In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import folium
from folium.plugins import MarkerCluster, FastMarkerCluster

import plotly.express as px
import plotly.io as pio
In [2]:
collisions = pd.read_csv('Los_Angeles_Collisions_2013through2018.csv',sep=',', 
                         dtype={"case_id": 'Int64', 'location_type': str, 'ramp_intersection': str, 'side_of_hwy': str,
                               'pcf_viol_category': str, 'not_private_property': str})
In [3]:
collisions.head()
Out[3]:
X Y OBJECTID case_id accident_year proc_date juris collision_date collision_time officer_id ... month_ city county state point_x point_y match_addr m_primaryrd m_secondrd int_id
0 -118.323902 34.047565 2001 5814841 2013 2013-03-04T00:00:00.000Z 1942 2013-01-22T00:00:00.000Z 840 36027.0 ... 1 LOS ANGELES LOS ANGELES CA -118.323902 34.047565 PICO BLVD & NORTON AVE, LOS ANGELES, CA PICO BLVD NORTON AVE 99970.0
1 -118.246332 34.062645 2002 6392368 2014 2014-06-06T00:00:00.000Z 1942 2014-01-25T00:00:00.000Z 730 39341.0 ... 1 LOS ANGELES LOS ANGELES CA -118.246332 34.062645 SUNSET BLVD & FIGUEROA ST, LOS ANGELES, CA SUNSET BLVD FIGUEROA ST 135877.0
2 -118.361432 34.062946 2003 6718345 2015 2015-03-16T00:00:00.000Z 1942 2015-01-17T00:00:00.000Z 715 40024.0 ... 1 LOS ANGELES LOS ANGELES CA -118.361432 34.062946 WILSHIRE BLVD & FAIRFAX AVE, LOS ANGELES, CA WILSHIRE BLVD FAIRFAX AVE 130218.0
3 -118.453707 34.032356 2004 5968457 2013 2014-06-09T00:00:00.000Z 1942 2013-01-03T00:00:00.000Z 745 38310.0 ... 1 LOS ANGELES LOS ANGELES CA -118.453707 34.032356 BUNDY DR & OLYMPIC BLVD, LOS ANGELES, CA BUNDY DR OLYMPIC BLVD 135334.0
4 -118.413943 34.221642 2005 6344888 2014 2014-06-05T00:00:00.000Z 1942 2014-01-10T00:00:00.000Z 745 39085.0 ... 1 LOS ANGELES LOS ANGELES CA -118.413943 34.221642 ROSCOE BLVD & COLDWATER CANYON AVE, LOS ANGELE... ROSCOE BLVD COLDWATER CANYON AVE 150465.0

5 rows × 90 columns

In [4]:
# Collisions grouped by the year they occurred
year_counts = collisions['accident_year'].value_counts()
fig1 = plt.figure(figsize=(20, 10))
ax1 = fig1.add_subplot()
plt.bar(year_counts.index, year_counts)
for i, v in year_counts.items():
    ax1.text(i - .1, v + 300, str(v))
    
plt.title('Collisions per Year for 2013 - 2018.')
plt.xlabel('Year')
plt.ylabel('Number of Collisions')
plt.show()
In [5]:
# Collisions grouped by month for the years 2013-2018
month_counts = collisions['month_'].value_counts()
fig2, ax2 = plt.subplots(figsize=(20, 10))

ax2.bar(month_counts.index, month_counts)

ax2.set_xticks(list(range(1, 13)))
ax2.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
for i, v in month_counts.items():
    ax2.text(i - .15, v + 200, str(v))
plt.title('Collisions per Month for 2013-2018')
plt.xlabel('Month')
plt.ylabel('Number of Collisions')
plt.show()
In [6]:
# Collisions under the influence
# 01 - Driving or Bicycling Under the Influence of Alcohol or Drug 
dui = collisions[collisions['pcf_viol_category'] == '01']
dui_counts = dui['accident_year'].value_counts()

fig3 = plt.figure(figsize=(20, 10))
ax3 = fig3.add_subplot()

plt.bar(dui_counts.index, dui_counts)
for i, v in dui_counts.items():    
    ax3.text(i - .05, v + 40, str(v))
    
plt.title('Collisions under the influence per Year for 2013 - 2018.')
plt.xlabel('Year')
plt.ylabel('Number of collisions under the influence');
In [7]:
# collisions grouped by weather
collisions['weather_1'] = collisions['weather_1'].replace({'A':'Clear', 'B':'Cloudy', 'C':'Raining', 'D':'Snowing', 
                                                           'E':'Fog', 'F':'Other', 'G':'Wind', '-': 'Not Stated'})

weather_counts = collisions['weather_1'].value_counts()
total_accidents = weather_counts.sum()
labels = []
for i, val in weather_counts.items():
    labels.append(i + ': {:.3%}'.format((1.*val) /total_accidents))
    
fig4 = plt.figure(figsize=(10, 10))
plt.pie(x=weather_counts, labels=labels, labeldistance=None)
plt.legend(loc='upper right')
plt.title('Collisions grouped by Weather Conditions');
In [8]:
collisions = collisions[collisions.point_y != 0] # remove rows where the y point is 0.
print(len(collisions))
213882
In [9]:
df_2018 = collisions.loc[collisions['accident_year'] == 2018]

map_2018 = folium.Map(location=[34, -118.25])
map_2018 = folium.Map(location=[df_2018.point_y.mean(), df_2018.point_x.mean()])

map_2018.add_child(FastMarkerCluster(df_2018[['point_y', 'point_x']].values.tolist()))

# Added to have the map be exported when saving the notebook as html.
# https://github.com/python-visualization/folium/issues/781#issuecomment-348174560
style_statement = '<style>.leaflet-control{color:#00FF00}</style>'
map_2018.get_root().html.add_child(folium.Element(style_statement))
map_2018
Out[9]:
In [ ]: