In [1]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:

    
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))









    Out[2]:



In [6]:

    
import geocoder
import csv
location = [43.65501,-79.42021]



In [13]:

    
from numpy import random
from scipy.spatial import distance



In [14]:

    
campus = [[43.657946,-79.39993],
          [43.663502,-79.40005],
          [43.663051,-79.402196],
          [43.665429,-79.398975]
         ]

campus_dict = {(43.657946,-79.39993):4495.8,
               (43.663502,-79.400050):2653,
               (43.663051,-79.402196):3574,
               (43.665429,-79.398975):2304
              }

waterfront = [[43.648208,-79.370923],
              [43.642711,-79.394043],
              [43.639944,-79.387032],
              [43.640625,-79.3932],
              [43.640093,-79.380152]
             ]

waterfront_dict = {(43.648208,-79.370923):330,
              (43.642711,-79.394043):745,
              (43.639944,-79.387032):128,
              (43.640625,-79.3932):154,
              (43.640093,-79.380152):235
             }          
castleFrank = [[43.673792,-79.368187]]

castleFrank_dict = {(43.673792,-79.368187):3413}



In [15]:

    
col_df = pd.DataFrame.from_csv('toronto_cycling_central_mode.csv', index_col='id')
col_df = col_df.sort_index()
col_df.head()









    Out[15]:






  
    
      
      injury
      safety_equip
      road_class
      cyclist_crash_type
      age_of_cyclist
      stname1
      stet_1_type
      stname_2
      stet_2_type
      long
      ...
      acc_time2
      traffic_control
      road_surface2
      date
      date2
      driver_action
      driver_condition
      year
      intersection
      coords
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      4.0
      Minimal
      Unrecorded
      Local
      Unrecorded
      34.0
      EUCLID
      AV
      ULSTER
      ST
      -79.413300
      ...
      08:00:00
      no control
      loose snow
      1986-01-14 00:00:00
      1986-01-14 00:00:00
      Lost control
      Normal
      1986.0
      Euclid Ave & Ulster St, Toronto, Ontario, M6G
      [43.65906013500046, -79.41272077799965]
    
    
      7.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      34.0
      SALEM
      AV
      FERNBANK
      AV
      -79.433801
      ...
      10:00:00
      no control
      slush
      1986-01-28 00:00:00
      1986-01-28 00:00:00
      Driving properly
      Normal
      1986.0
      Fernbank Ave & Salem Ave, Toronto, Ontario, M6H
      [43.66582101600045, -79.43379108299968]
    
    
      8.0
      Minor
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      DUNDAS
      ST
      BEVERLEY
      ST
      -79.394060
      ...
      08:00:00
      traffic signal
      wet
      1986-01-16 00:00:00
      1986-01-16 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Beverley St & Dundas St W, Toronto, Ontario, M5T
      [43.65382045300049, -79.39385107199968]
    
    
      9.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      BLOOR
      ST
      MARGUERETTA
      ST
      -79.440590
      ...
      19:00:00
      no control
      wet
      1986-02-19 00:00:00
      1986-02-19 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Bloor St W & Margueretta, Toronto, Ontario, M6H
      [43.658975,-79.439718]
    
    
      10.0
      Minor
      Unrecorded
      Minor Arterial
      Unrecorded
      54.0
      AVENUE
      RD
      SHANLY
      ST
      -79.430720
      ...
      15:00:00
      stop sign
      dry
      1986-03-01 00:00:00
      1986-03-01 00:00:00
      Driving properly
      Normal
      1986.0
      Dovercourt Rd & Shanly St, Toronto, Ontario, M6H
      [43.664316,-79.430499]
    
  

5 rows × 22 columns



In [102]:

    
coords = central_col_df[["stname1","stname_2","street_1_type","street_2_type"]]
coords= coords.values.tolist()
coordinate_list=[]
for pair in coords:
    if isinstance(pair[1],str): 
        if pair[1] <= pair[0]:
            temp = pair[0]
            pair[0] = pair[1]
            pair[1] = temp
            temp = pair[2]
            pair[2] = pair[3]
            pair[3] = temp
coords2 = coords
for pair2 in coords2:
    pair2 = str(pair2[0]) + ' ' + str(pair2[2]) + ' & ' + str(pair2[1]) + ' ' + str(pair2[3]) + ', Toronto'
    coordinate_list.append(pair2)



In [ ]:

    
coords = col_df[["STNAME1","STNAME 2","STET 1 TYPE","STET 2 TYPE"]]
coords= coords.values.tolist()
coordinate_list=[]
for pair in coords:
    if isinstance(pair[1],str): 
        if pair[1] <= pair[0]:
            temp = pair[0]
            pair[0] = pair[1]
            pair[1] = temp
            temp = pair[2]
            pair[2] = pair[3]
            pair[3] = temp
coords2 = coords
for pair2 in coords2:
    pair2 = str(pair2[0]) + ' ' + str(pair2[2]) + ' & ' + str(pair2[1]) + ' ' + str(pair2[3]) + ', Toronto'
    coordinate_list.append(pair2)



In [16]:

    
ped_counts_df = pd.DataFrame.from_csv('Vehicle and Pedestrian Counts/TrafficPedestrianVolumes_2011.csv')

Using the Power fit for the Bike/Pedestrian ratio, we get a function that predicts the bike numbers at any one intersection.



In [17]:

    
ped_counts_df['bike_prediction'] = (500.2146799711*ped_counts_df['8HrPedVol']**(-0.8950759596))*ped_counts_df['8HrPedVol']



In [18]:

    
ped_counts_df.head()









    Out[18]:






  
    
      
      Main
      Midblock Route
      Side 1 Route
      Side 2 Route
      Activation Date
      Latitude
      Longitude
      Count Date
      8HrPedVol
      8HrVehVol
      24HrPedVol
      24HrVehVol
      bike_prediction
    
    
      PX2
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2
      JARVIS ST
      NaN
      FRONT ST E
      NaN
      1948/11/15
      43.649450
      -79.371410
      2011/09/08
      17008
      19335
      34016
      38670
      1390.124396
    
    
      3
      KING ST E
      NaN
      JARVIS ST
      NaN
      1950/08/23
      43.650461
      -79.371924
      2011/09/07
      37719
      17665
      75438
      35330
      1511.289334
    
    
      4
      JARVIS ST
      NaN
      ADELAIDE ST E
      NaN
      1958/09/12
      43.651534
      -79.372360
      2008/06/16
      1991
      19726
      3982
      39452
      1109.962532
    
    
      5
      JARVIS ST
      NaN
      RICHMOND ST E
      NaN
      1962/04/21
      43.652718
      -79.372824
      2009/07/30
      2696
      24842
      5392
      49684
      1145.833262
    
    
      6
      JARVIS ST
      NaN
      QUEEN ST E
      NaN
      1928/08/24
      43.653704
      -79.373238
      2011/05/18
      3622
      19772
      7244
      39544
      1181.886259



In [19]:

    
ped_coords = ped_counts_df[['Latitude','Longitude']]
ped_coords = ped_coords.replace(np.nan,0)
ped_coordinate_list = ped_coords.values.tolist()



In [20]:

    
ped_counts_df['coordinates'] = ped_counts_df[['Latitude','Longitude']].apply(tuple, axis=1)
ped_counts_df.head()









    Out[20]:






  
    
      
      Main
      Midblock Route
      Side 1 Route
      Side 2 Route
      Activation Date
      Latitude
      Longitude
      Count Date
      8HrPedVol
      8HrVehVol
      24HrPedVol
      24HrVehVol
      bike_prediction
      coordinates
    
    
      PX2
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2
      JARVIS ST
      NaN
      FRONT ST E
      NaN
      1948/11/15
      43.649450
      -79.371410
      2011/09/08
      17008
      19335
      34016
      38670
      1390.124396
      (43.64945, -79.37141)
    
    
      3
      KING ST E
      NaN
      JARVIS ST
      NaN
      1950/08/23
      43.650461
      -79.371924
      2011/09/07
      37719
      17665
      75438
      35330
      1511.289334
      (43.6504606, -79.3719239)
    
    
      4
      JARVIS ST
      NaN
      ADELAIDE ST E
      NaN
      1958/09/12
      43.651534
      -79.372360
      2008/06/16
      1991
      19726
      3982
      39452
      1109.962532
      (43.6515337, -79.37236)
    
    
      5
      JARVIS ST
      NaN
      RICHMOND ST E
      NaN
      1962/04/21
      43.652718
      -79.372824
      2009/07/30
      2696
      24842
      5392
      49684
      1145.833262
      (43.6527176, -79.372824)
    
    
      6
      JARVIS ST
      NaN
      QUEEN ST E
      NaN
      1928/08/24
      43.653704
      -79.373238
      2011/05/18
      3622
      19772
      7244
      39544
      1181.886259
      (43.653704, -79.373238)



In [21]:

    
ped_dict = ped_counts_df.set_index('coordinates').to_dict()['bike_prediction']



In [22]:

    
col_df['coordinates'] = col_df[['lat','long']].apply(tuple, axis=1)
col_df.head()









    Out[22]:






  
    
      
      injury
      safety_equip
      road_class
      cyclist_crash_type
      age_of_cyclist
      stname1
      stet_1_type
      stname_2
      stet_2_type
      long
      ...
      traffic_control
      road_surface2
      date
      date2
      driver_action
      driver_condition
      year
      intersection
      coords
      coordinates
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      4.0
      Minimal
      Unrecorded
      Local
      Unrecorded
      34.0
      EUCLID
      AV
      ULSTER
      ST
      -79.413300
      ...
      no control
      loose snow
      1986-01-14 00:00:00
      1986-01-14 00:00:00
      Lost control
      Normal
      1986.0
      Euclid Ave & Ulster St, Toronto, Ontario, M6G
      [43.65906013500046, -79.41272077799965]
      (43.6599, -79.4133)
    
    
      7.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      34.0
      SALEM
      AV
      FERNBANK
      AV
      -79.433801
      ...
      no control
      slush
      1986-01-28 00:00:00
      1986-01-28 00:00:00
      Driving properly
      Normal
      1986.0
      Fernbank Ave & Salem Ave, Toronto, Ontario, M6H
      [43.66582101600045, -79.43379108299968]
      (43.665832, -79.433801)
    
    
      8.0
      Minor
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      DUNDAS
      ST
      BEVERLEY
      ST
      -79.394060
      ...
      traffic signal
      wet
      1986-01-16 00:00:00
      1986-01-16 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Beverley St & Dundas St W, Toronto, Ontario, M5T
      [43.65382045300049, -79.39385107199968]
      (43.65377, -79.39406)
    
    
      9.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      BLOOR
      ST
      MARGUERETTA
      ST
      -79.440590
      ...
      no control
      wet
      1986-02-19 00:00:00
      1986-02-19 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Bloor St W & Margueretta, Toronto, Ontario, M6H
      [43.658975,-79.439718]
      (43.6588, -79.44059)
    
    
      10.0
      Minor
      Unrecorded
      Minor Arterial
      Unrecorded
      54.0
      AVENUE
      RD
      SHANLY
      ST
      -79.430720
      ...
      stop sign
      dry
      1986-03-01 00:00:00
      1986-03-01 00:00:00
      Driving properly
      Normal
      1986.0
      Dovercourt Rd & Shanly St, Toronto, Ontario, M6H
      [43.664316,-79.430499]
      (43.66425, -79.43072)
    
  

5 rows × 23 columns



In [23]:

    
central_col_df = col_df



In [38]:

    
#only run after above cell already has produced output. Otherwise file doesn't exist. 
import csv
with open('closest_traffic_count.csv', 'r') as f:
    reader = csv.reader(f)
    closest_traffic_count = list(reader)



In [24]:

    
import matplotlib.path as mplPath
import numpy as np

waterfront_Path = mplPath.Path(np.array([[43.635497, -79.398156],
[43.639000, -79.400725],
[43.640822, -79.401427],
[43.646984, -79.376977],
[43.649889, -79.370343],
[43.651614, -79.362725],
[43.648090, -79.361191],
[43.646451, -79.361937],
[43.641209, -79.376739],
[43.639969, -79.379965],
[43.637698, -79.391847],
[43.635666, -79.398368],
[43.636489, -79.399603]]))

waterfront_Path.contains_point((200, 100))









    Out[24]:





False



In [25]:

    
campus_Path = mplPath.Path(np.array([[43.659838, -79.399772],
[43.661388, -79.401006],
[43.665592, -79.402705],
[43.666768, -79.401354],
[43.668213, -79.393958],
[43.663141, -79.392719],
[43.659264, -79.394100],
[43.658329, -79.398204]]
))

campus_Path.contains_point((43.661013, -79.397200))









    Out[25]:





True



In [26]:

    
castleFrank_Path = mplPath.Path(np.array([[43.672105, -79.376696],
[43.671562, -79.370962],
[43.674418, -79.366821],
[43.676086, -79.358731],
[43.677056, -79.354021],
[43.677040, -79.355126],
[43.677622, -79.358516],
[43.676194, -79.359503],
[43.675170, -79.364760],
[43.674580, -79.367539],
[43.672019, -79.371112],
[43.672710, -79.376927]]
))

castleFrank_Path.contains_point((43.676120, -79.359035))









    Out[26]:





True



In [27]:

    
import csv
closest_traffic_point = []
bike_traffic = []
i = 0
for i in range(0,len(central_col_df)):
    point = central_col_df['coordinates'].iloc[i]
    if waterfront_Path.contains_point(point):
        closest = waterfront[distance.cdist([point], waterfront).argmin()]
        closest_traffic_point.append(tuple(closest))
        bike_traffic.append(waterfront_dict[tuple(closest)])
    elif campus_Path.contains_point(point):
        closest = campus[distance.cdist([point], campus).argmin()]
        closest_traffic_point.append(tuple(closest))
        bike_traffic.append(campus_dict[tuple(closest)])
    elif castleFrank_Path.contains_point(point):
        closest = castleFrank[distance.cdist([point], castleFrank).argmin()]
        closest_traffic_point.append(tuple(closest))
        bike_traffic.append(castleFrank_dict[tuple(closest)])
    else:
        closest = ped_coordinate_list[distance.cdist([point], ped_coordinate_list).argmin()]
        closest_traffic_point.append(tuple(closest))
        bike_traffic.append(ped_dict[tuple(closest)])
        
myfile3 = open('closest_intersection.csv', 'w')
wr = csv.writer(myfile3)
wr.writerow(closest_traffic_point)
myfile3.close()

myfile4 = open('closest_int_bike_predictions.csv', 'w')
wr = csv.writer(myfile4)
wr.writerow(bike_traffic)
myfile4.close()



In [28]:

    
bike_traffic[:10]









    Out[28]:





[1145.9669774671361,
 1043.663994458788,
 1235.7081763712024,
 1083.2871929951482,
 1043.663994458788,
 1409.2143012952026,
 1120.2506065859702,
 1432.4371794221831,
 864.05610391248706,
 1214.3963006224874]



In [29]:

    
central_col_df['closest_traffic'] = tuple(closest_traffic_point)
central_col_df['traffic_count'] = bike_traffic

central_col_df.rename(columns={'closest_traffic': 'closest_ped_count', 'traffic_count':'predicted_bike_count'})
central_col_df.head()









    Out[29]:






  
    
      
      injury
      safety_equip
      road_class
      cyclist_crash_type
      age_of_cyclist
      stname1
      stet_1_type
      stname_2
      stet_2_type
      long
      ...
      date
      date2
      driver_action
      driver_condition
      year
      intersection
      coords
      coordinates
      closest_traffic
      traffic_count
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      4.0
      Minimal
      Unrecorded
      Local
      Unrecorded
      34.0
      EUCLID
      AV
      ULSTER
      ST
      -79.413300
      ...
      1986-01-14 00:00:00
      1986-01-14 00:00:00
      Lost control
      Normal
      1986.0
      Euclid Ave & Ulster St, Toronto, Ontario, M6G
      [43.65906013500046, -79.41272077799965]
      (43.6599, -79.4133)
      (43.66055, -79.41466000000001)
      1145.966977
    
    
      7.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      34.0
      SALEM
      AV
      FERNBANK
      AV
      -79.433801
      ...
      1986-01-28 00:00:00
      1986-01-28 00:00:00
      Driving properly
      Normal
      1986.0
      Fernbank Ave & Salem Ave, Toronto, Ontario, M6H
      [43.66582101600045, -79.43379108299968]
      (43.665832, -79.433801)
      (43.66717180000001, -79.43169820000001)
      1043.663994
    
    
      8.0
      Minor
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      DUNDAS
      ST
      BEVERLEY
      ST
      -79.394060
      ...
      1986-01-16 00:00:00
      1986-01-16 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Beverley St & Dundas St W, Toronto, Ontario, M5T
      [43.65382045300049, -79.39385107199968]
      (43.65377, -79.39406)
      (43.65381040000001, -79.39383199999999)
      1235.708176
    
    
      9.0
      Minimal
      Unrecorded
      Major Arterial
      Unrecorded
      24.0
      BLOOR
      ST
      MARGUERETTA
      ST
      -79.440590
      ...
      1986-02-19 00:00:00
      1986-02-19 00:00:00
      Failed to yield right-of-way
      Normal
      1986.0
      Bloor St W & Margueretta, Toronto, Ontario, M6H
      [43.658975,-79.439718]
      (43.6588, -79.44059)
      (43.6591, -79.43918000000002)
      1083.287193
    
    
      10.0
      Minor
      Unrecorded
      Minor Arterial
      Unrecorded
      54.0
      AVENUE
      RD
      SHANLY
      ST
      -79.430720
      ...
      1986-03-01 00:00:00
      1986-03-01 00:00:00
      Driving properly
      Normal
      1986.0
      Dovercourt Rd & Shanly St, Toronto, Ontario, M6H
      [43.664316,-79.430499]
      (43.66425, -79.43072)
      (43.66717180000001, -79.43169820000001)
      1043.663994
    
  

5 rows × 25 columns



In [41]:

    
intersection_df = central_col_df.groupby('intersection').agg({'traffic_count':[np.size,np.mean]}).sort_values(by=[('traffic_count','mean')],ascending=False)
intersection_df.head(10)









    Out[41]:






  
    
      
      traffic_count
    
    
      
      size
      mean
    
    
      intersection
      
      
    
  
  
    
      Galbraith Rd & St George St, Toronto, Ontario, M5S
      3.0
      4495.800000
    
    
      Huron St & Russell St, Toronto, Ontario, M5S
      3.0
      4495.800000
    
    
      Russell St & St George St, Toronto, Ontario, M5S
      11.0
      3695.329056
    
    
      Willcocks St & Spadina Ave, Toronto, Ontario, M5S
      1.0
      3574.000000
    
    
      Glen Morris St & Spadina Ave, Toronto, Ontario, M5S
      1.0
      3574.000000
    
    
      Prince Edward Viad & Bayview Ave, Toronto, Ontario, M4W
      1.0
      3413.000000
    
    
      Howard St & Ontario St, Toronto, Ontario, M4X
      1.0
      3413.000000
    
    
      Howard St & Rose Ave, Toronto, Ontario, M4X
      5.0
      3413.000000
    
    
      Bloor St E & Castle Frank Cres, Toronto, Ontario, M4W
      20.0
      3413.000000
    
    
      Bloor St E & Parliament St, Toronto, Ontario, M4W
      26.0
      3413.000000



In [40]:

    
intersection_df = central_col_df.groupby('intersection').agg({'traffic_count':[np.size,np.mean]}).sort_values(by=[('traffic_count','size')],ascending=False)
intersection_df.head(10)









    Out[40]:






  
    
      
      traffic_count
    
    
      
      size
      mean
    
    
      intersection
      
      
    
  
  
    
      Queen St W & Spadina Ave, Toronto, Ontario, M5V
      74.0
      1314.307288
    
    
      Bay St & Dundas St W, Toronto, Ontario, M5G
      71.0
      1504.051740
    
    
      Queen St W & University Ave, Toronto, Ontario, M5H
      64.0
      1337.865922
    
    
      Bloor St E & Yonge St, Toronto, Ontario, M4W
      63.0
      1439.906704
    
    
      Bathurst St & Queen St W, Toronto, Ontario, M5T
      61.0
      1270.732436
    
    
      Bathurst St & College St, Toronto, Ontario, M5S
      60.0
      1248.605392
    
    
      King St W & York St, Toronto, Ontario, M5H
      58.0
      1274.090650
    
    
      Bay St & King St W, Toronto, Ontario, M5H
      57.0
      1436.424115
    
    
      Dundas St E & Yonge St, Toronto, Ontario, M5B
      57.0
      1546.272908
    
    
      College St & University Ave, Toronto, Ontario, M5G
      55.0
      1325.051617



In [60]:

    
intersection_df['traffic_count','normalized_accident_rate'] = intersection_df['traffic_count','size']/(24*intersection_df['traffic_count','mean'])
intersection_df = intersection_df.sort_values(by=[('traffic_count','normalized_accident_rate')],ascending=False)
intersection_df.head(20)









    Out[60]:






  
    
      
      traffic_count
    
    
      
      size
      mean
      normalized_accident_rate
    
    
      intersection
      
      
      
    
  
  
    
      Front St W & York St, Toronto, Ontario, M5J
      29.0
      276.607735
      0.004368
    
    
      Front St W & John St, Toronto, Ontario, M5V
      17.0
      199.933112
      0.003543
    
    
      Queens Quay W & Rees St, Toronto, Ontario, M5J
      9.0
      139.888889
      0.002681
    
    
      Bay St & Queens Quay W, Toronto, Ontario, M5J
      18.0
      288.928665
      0.002596
    
    
      Queens Quay W & York St, Toronto, Ontario, M5J
      18.0
      289.316788
      0.002592
    
    
      Yonge St & The Esplanade, Toronto, Ontario, M5E
      20.0
      330.000000
      0.002525
    
    
      Station St & York St, Toronto, Ontario, M5J
      14.0
      235.000000
      0.002482
    
    
      Lake Shore Blvd W & York St, Toronto, Ontario, M5J
      14.0
      235.000000
      0.002482
    
    
      Queen St W & Spadina Ave, Toronto, Ontario, M5V
      74.0
      1314.307288
      0.002346
    
    
      Lake Shore Blvd E & Yonge St, Toronto, Ontario, M5E
      13.0
      235.000000
      0.002305
    
    
      Bathurst St & College St, Toronto, Ontario, M5S
      60.0
      1248.605392
      0.002002
    
    
      Bathurst St & Queen St W, Toronto, Ontario, M5T
      61.0
      1270.732436
      0.002000
    
    
      Queen St W & University Ave, Toronto, Ontario, M5H
      64.0
      1337.865922
      0.001993
    
    
      Bay St & Dundas St W, Toronto, Ontario, M5G
      71.0
      1504.051740
      0.001967
    
    
      Bay St & Lake Shore Blvd W, Toronto, Ontario, M5J
      11.0
      235.000000
      0.001950
    
    
      College St & Shaw St, Toronto, Ontario, M6G
      53.0
      1153.924281
      0.001914
    
    
      King St W & York St, Toronto, Ontario, M5H
      58.0
      1274.090650
      0.001897
    
    
      Lake Shore Blvd W & Spadina Ave, Toronto, Ontario, M5V
      7.0
      154.000000
      0.001894
    
    
      Bloor St W & Keele St, Toronto, Ontario, M6P
      49.0
      1113.959596
      0.001833
    
    
      Bloor St E & Yonge St, Toronto, Ontario, M4W
      63.0
      1439.906704
      0.001823



In [61]:

    
df_test = intersection_df['traffic_count','normalized_accident_rate']
df = df_test.to_frame(name='normalized yearly accident rate')
df['total collisions'] = intersection_df['traffic_count','size']
df['traffic estimate'] = intersection_df['traffic_count','mean']
df = df.dropna()
len(df)









    Out[61]:





1449



In [75]:

    
sns.set_context("notebook", font_scale=1.1)
scatter = sns.jointplot(x='total collisions', y = 'normalized yearly accident rate', data = df, ylim= (0,0.005))



In [47]:

    
intersection_df.to_csv('totals_test.csv')
df.head(10)









    Out[47]:






  
    
      
      normalized accident rate
      total collisions
      traffic estimate
    
    
      intersection
      
      
      
    
  
  
    
      Front St W & York St, Toronto, Ontario, M5J
      0.104842
      29.0
      276.607735
    
    
      Front St W & John St, Toronto, Ontario, M5V
      0.085028
      17.0
      199.933112
    
    
      Queens Quay W & Rees St, Toronto, Ontario, M5J
      0.064337
      9.0
      139.888889
    
    
      Bay St & Queens Quay W, Toronto, Ontario, M5J
      0.062299
      18.0
      288.928665
    
    
      Queens Quay W & York St, Toronto, Ontario, M5J
      0.062216
      18.0
      289.316788
    
    
      Yonge St & The Esplanade, Toronto, Ontario, M5E
      0.060606
      20.0
      330.000000
    
    
      Station St & York St, Toronto, Ontario, M5J
      0.059574
      14.0
      235.000000
    
    
      Lake Shore Blvd W & York St, Toronto, Ontario, M5J
      0.059574
      14.0
      235.000000
    
    
      Queen St W & Spadina Ave, Toronto, Ontario, M5V
      0.056303
      74.0
      1314.307288
    
    
      Lake Shore Blvd E & Yonge St, Toronto, Ontario, M5E
      0.055319
      13.0
      235.000000



In [52]:

    
import scipy.stats as stats



In [49]:

    
sns.distplot(df["traffic estimate"], bins = 100)









    Out[49]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f92a73a6e80>



In [35]:

    
sns.distplot(df["total collisions"], bins = 15)









    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f92a78eb710>



In [82]:

    
sns.distplot(df["total collisions"], fit=stats.gamma,  bins=range(0, 75, 1), kde=False)









    Out[82]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f92a6117160>

A gamma distribution looks like a good choice. We know that we are dealing with sums of poission distributions where each intersection is basically a poisson process, with the mean accidents for a year being the parameter. So the above histogram is really summing up the number of accidents each year, over these poisson processes. While we could try and fit each intersection individually, this is a whole lot of work. And it's not clear how one would use these estimated parameters to compute the prior distribution for the ensemble.

A gamma distribution looks nice, and intuitively makes sense since the prior for a poisson distribution is a gamma. Trying to read up on the intuition for how a ensemble of Poisson processes, each with their own paramater (not merely the same), hasn't lead me to any intuition for why I should stick with a gamma distribution for the entire ensemble, but it's a nice fit.



In [80]:

    
stats.gamma.fit(df['total collisions'])









    Out[80]:





(0.3457292036111696, 0.99999999999999978, 13.068355505386879)



In [ ]:

	injury	safety_equip	road_class	cyclist_crash_type	age_of_cyclist	stname1	stet_1_type	stname_2	stet_2_type	long	...	acc_time2	traffic_control	road_surface2	date	date2	driver_action	driver_condition	year	intersection	coords
id
4.0	Minimal	Unrecorded	Local	Unrecorded	34.0	EUCLID	AV	ULSTER	ST	-79.413300	...	08:00:00	no control	loose snow	1986-01-14 00:00:00	1986-01-14 00:00:00	Lost control	Normal	1986.0	Euclid Ave & Ulster St, Toronto, Ontario, M6G	[43.65906013500046, -79.41272077799965]
7.0	Minimal	Unrecorded	Major Arterial	Unrecorded	34.0	SALEM	AV	FERNBANK	AV	-79.433801	...	10:00:00	no control	slush	1986-01-28 00:00:00	1986-01-28 00:00:00	Driving properly	Normal	1986.0	Fernbank Ave & Salem Ave, Toronto, Ontario, M6H	[43.66582101600045, -79.43379108299968]
8.0	Minor	Unrecorded	Major Arterial	Unrecorded	24.0	DUNDAS	ST	BEVERLEY	ST	-79.394060	...	08:00:00	traffic signal	wet	1986-01-16 00:00:00	1986-01-16 00:00:00	Failed to yield right-of-way	Normal	1986.0	Beverley St & Dundas St W, Toronto, Ontario, M5T	[43.65382045300049, -79.39385107199968]
9.0	Minimal	Unrecorded	Major Arterial	Unrecorded	24.0	BLOOR	ST	MARGUERETTA	ST	-79.440590	...	19:00:00	no control	wet	1986-02-19 00:00:00	1986-02-19 00:00:00	Failed to yield right-of-way	Normal	1986.0	Bloor St W & Margueretta, Toronto, Ontario, M6H	[43.658975,-79.439718]
10.0	Minor	Unrecorded	Minor Arterial	Unrecorded	54.0	AVENUE	RD	SHANLY	ST	-79.430720	...	15:00:00	stop sign	dry	1986-03-01 00:00:00	1986-03-01 00:00:00	Driving properly	Normal	1986.0	Dovercourt Rd & Shanly St, Toronto, Ontario, M6H	[43.664316,-79.430499]

	Main	Midblock Route	Side 1 Route	Side 2 Route	Activation Date	Latitude	Longitude	Count Date	8HrPedVol	8HrVehVol	24HrPedVol	24HrVehVol	bike_prediction
PX2
2	JARVIS ST	NaN	FRONT ST E	NaN	1948/11/15	43.649450	-79.371410	2011/09/08	17008	19335	34016	38670	1390.124396
3	KING ST E	NaN	JARVIS ST	NaN	1950/08/23	43.650461	-79.371924	2011/09/07	37719	17665	75438	35330	1511.289334
4	JARVIS ST	NaN	ADELAIDE ST E	NaN	1958/09/12	43.651534	-79.372360	2008/06/16	1991	19726	3982	39452	1109.962532
5	JARVIS ST	NaN	RICHMOND ST E	NaN	1962/04/21	43.652718	-79.372824	2009/07/30	2696	24842	5392	49684	1145.833262
6	JARVIS ST	NaN	QUEEN ST E	NaN	1928/08/24	43.653704	-79.373238	2011/05/18	3622	19772	7244	39544	1181.886259

	traffic_count
	size	mean
intersection
Galbraith Rd & St George St, Toronto, Ontario, M5S	3.0	4495.800000
Huron St & Russell St, Toronto, Ontario, M5S	3.0	4495.800000
Russell St & St George St, Toronto, Ontario, M5S	11.0	3695.329056
Willcocks St & Spadina Ave, Toronto, Ontario, M5S	1.0	3574.000000
Glen Morris St & Spadina Ave, Toronto, Ontario, M5S	1.0	3574.000000
Prince Edward Viad & Bayview Ave, Toronto, Ontario, M4W	1.0	3413.000000
Howard St & Ontario St, Toronto, Ontario, M4X	1.0	3413.000000
Howard St & Rose Ave, Toronto, Ontario, M4X	5.0	3413.000000
Bloor St E & Castle Frank Cres, Toronto, Ontario, M4W	20.0	3413.000000
Bloor St E & Parliament St, Toronto, Ontario, M4W	26.0	3413.000000

	traffic_count
	size	mean	normalized_accident_rate
intersection
Front St W & York St, Toronto, Ontario, M5J	29.0	276.607735	0.004368
Front St W & John St, Toronto, Ontario, M5V	17.0	199.933112	0.003543
Queens Quay W & Rees St, Toronto, Ontario, M5J	9.0	139.888889	0.002681
Bay St & Queens Quay W, Toronto, Ontario, M5J	18.0	288.928665	0.002596
Queens Quay W & York St, Toronto, Ontario, M5J	18.0	289.316788	0.002592
Yonge St & The Esplanade, Toronto, Ontario, M5E	20.0	330.000000	0.002525
Station St & York St, Toronto, Ontario, M5J	14.0	235.000000	0.002482
Lake Shore Blvd W & York St, Toronto, Ontario, M5J	14.0	235.000000	0.002482
Queen St W & Spadina Ave, Toronto, Ontario, M5V	74.0	1314.307288	0.002346
Lake Shore Blvd E & Yonge St, Toronto, Ontario, M5E	13.0	235.000000	0.002305
Bathurst St & College St, Toronto, Ontario, M5S	60.0	1248.605392	0.002002
Bathurst St & Queen St W, Toronto, Ontario, M5T	61.0	1270.732436	0.002000
Queen St W & University Ave, Toronto, Ontario, M5H	64.0	1337.865922	0.001993
Bay St & Dundas St W, Toronto, Ontario, M5G	71.0	1504.051740	0.001967
Bay St & Lake Shore Blvd W, Toronto, Ontario, M5J	11.0	235.000000	0.001950
College St & Shaw St, Toronto, Ontario, M6G	53.0	1153.924281	0.001914
King St W & York St, Toronto, Ontario, M5H	58.0	1274.090650	0.001897
Lake Shore Blvd W & Spadina Ave, Toronto, Ontario, M5V	7.0	154.000000	0.001894
Bloor St W & Keele St, Toronto, Ontario, M6P	49.0	1113.959596	0.001833
Bloor St E & Yonge St, Toronto, Ontario, M4W	63.0	1439.906704	0.001823

	normalized accident rate	total collisions	traffic estimate
intersection
Front St W & York St, Toronto, Ontario, M5J	0.104842	29.0	276.607735
Front St W & John St, Toronto, Ontario, M5V	0.085028	17.0	199.933112
Queens Quay W & Rees St, Toronto, Ontario, M5J	0.064337	9.0	139.888889
Bay St & Queens Quay W, Toronto, Ontario, M5J	0.062299	18.0	288.928665
Queens Quay W & York St, Toronto, Ontario, M5J	0.062216	18.0	289.316788
Yonge St & The Esplanade, Toronto, Ontario, M5E	0.060606	20.0	330.000000
Station St & York St, Toronto, Ontario, M5J	0.059574	14.0	235.000000
Lake Shore Blvd W & York St, Toronto, Ontario, M5J	0.059574	14.0	235.000000
Queen St W & Spadina Ave, Toronto, Ontario, M5V	0.056303	74.0	1314.307288
Lake Shore Blvd E & Yonge St, Toronto, Ontario, M5E	0.055319	13.0	235.000000