In [4]:
import numpy as np
import pandas as pd
import itertools
from __future__ import division
import geoplotlib as glp
from geoplotlib.utils import BoundingBox, DataAccessObject

%matplotlib inline
pd.set_option('display.max_columns', None)

In [2]:
filePath = 'datasets/NYPD_Motor_Vehicle_Collisions_weather3.csv'

collisions = pd.read_csv(filePath)

In [3]:
collisions = collisions[pd.notnull(collisions.BOROUGH)]
collisions


Out[3]:
DATE TIME BOROUGH ZIP CODE LATITUDE LONGITUDE LOCATION ON STREET NAME CROSS STREET NAME OFF STREET NAME NUMBER OF PERSONS INJURED NUMBER OF PERSONS KILLED NUMBER OF PEDESTRIANS INJURED NUMBER OF PEDESTRIANS KILLED NUMBER OF CYCLIST INJURED NUMBER OF CYCLIST KILLED NUMBER OF MOTORIST INJURED NUMBER OF MOTORIST KILLED CONTRIBUTING FACTOR VEHICLE 1 CONTRIBUTING FACTOR VEHICLE 2 CONTRIBUTING FACTOR VEHICLE 3 CONTRIBUTING FACTOR VEHICLE 4 CONTRIBUTING FACTOR VEHICLE 5 UNIQUE KEY VEHICLE TYPE CODE 1 VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5
0 03/14/2016 3:27 QUEENS 11372 40.747734 -73.882999 (40.7477341, -73.8829986) ROOSEVELT AVENUE 83 STREET NaN 1 0 1 0 0 0 0 0 Unspecified NaN NaN NaN NaN 3405169 OTHER NaN NaN NaN NaN
3 03/14/2016 0:45 MANHATTAN 10035 40.808279 -73.938793 (40.8082795, -73.9387929) EAST 129 STREET MADISON AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3405059 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
4 03/13/2016 23:00 BROOKLYN 11206 40.706653 -73.950406 (40.7066527, -73.9504063) UNION AVENUE MONTROSE AVENUE NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction Unspecified NaN NaN NaN 3405121 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
6 03/13/2016 9:48 BROOKLYN 11212 40.661997 -73.919593 (40.661997, -73.9195931) KINGS HIGHWAY EAST 98 STREET NaN 0 0 0 0 0 0 0 0 Passenger Distraction Unspecified NaN NaN NaN 3404744 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
9 03/13/2016 9:46 QUEENS 11106 40.756580 -73.929752 (40.75658, -73.9297516) 36 AVENUE 31 STREET NaN 0 0 0 0 0 0 0 0 Failure to Yield Right-of-Way Unspecified NaN NaN NaN 3404995 LIVERY VEHICLE PASSENGER VEHICLE NaN NaN NaN
11 03/13/2016 16:00 QUEENS 11413 40.672174 -73.761021 (40.6721741, -73.7610205) BEDELL STREET 140 AVENUE NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction Driver Inattention/Distraction NaN NaN NaN 3404948 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
12 03/13/2016 15:20 QUEENS 11105 40.779280 -73.900874 (40.7792798, -73.9008742) 37 STREET 19 AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3405001 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
14 03/13/2016 15:20 MANHATTAN 10019 40.761719 -73.982807 (40.7617191, -73.9828066) WEST 51 STREET 7 AVENUE NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction Unspecified NaN NaN NaN 3404581 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
15 03/13/2016 9:15 STATEN ISLAND 10304 40.591737 -74.096676 (40.5917371, -74.0966763) ALTER AVENUE JEFFERSON STREET NaN 0 0 0 0 0 0 0 0 Prescription Medication Unspecified NaN NaN NaN 3405044 SPORT UTILITY / STATION WAGON UNKNOWN NaN NaN NaN
18 03/13/2016 15:15 MANHATTAN 10036 40.757828 -73.993070 (40.7578281, -73.9930696) 9 AVENUE WEST 41 STREET NaN 0 0 0 0 0 0 0 0 Fatigued/Drowsy Unspecified NaN NaN NaN 3404556 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
19 03/13/2016 15:10 BROOKLYN 11206 40.701076 -73.940420 (40.7010761, -73.9404204) SUMNER PLACE FLUSHING AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404826 SPORT UTILITY / STATION WAGON SPORT UTILITY / STATION WAGON NaN NaN NaN
20 03/13/2016 14:52 BRONX 10459 40.819365 -73.893068 (40.819365, -73.8930684) BARRETTO STREET SOUTHERN BOULEVARD NaN 0 0 0 0 0 0 0 0 Fatigued/Drowsy Unspecified NaN NaN NaN 3404650 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
21 03/13/2016 14:45 MANHATTAN 10023 40.775487 -73.982164 (40.7754866, -73.9821639) WEST 68 STREET BROADWAY NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404597 SPORT UTILITY / STATION WAGON TAXI NaN NaN NaN
23 03/13/2016 9:15 QUEENS 11357 40.787170 -73.806965 (40.7871699, -73.8069646) CRYDERS LANE 155 STREET NaN 0 0 0 0 0 0 0 0 Unspecified NaN NaN NaN NaN 3405145 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
24 03/13/2016 14:43 MANHATTAN 10128 40.783248 -73.944723 (40.7832478, -73.9447234) EAST 96 STREET 1 AVENUE NaN 0 0 0 0 0 0 0 0 Other Vehicular Other Vehicular NaN NaN NaN 3404594 TAXI TAXI NaN NaN NaN
25 03/13/2016 14:43 BROOKLYN 11221 40.693862 -73.929702 (40.6938623, -73.9297023) DEKALB AVENUE BROADWAY NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404827 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
28 03/13/2016 14:37 BROOKLYN 11213 40.666022 -73.929461 (40.6660219, -73.9294613) FORD STREET CARROLL STREET NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction NaN NaN NaN NaN 3405097 SPORT UTILITY / STATION WAGON NaN NaN NaN NaN
29 03/13/2016 9:10 MANHATTAN 10017 40.753242 -73.966617 (40.7532425, -73.9666173) EAST 49 STREET 1 AVENUE NaN 0 0 0 0 0 0 0 0 Alcohol Involvement NaN NaN NaN NaN 3405055 PASSENGER VEHICLE NaN NaN NaN NaN
32 03/13/2016 9:00 MANHATTAN 10019 40.771464 -73.994346 (40.7714641, -73.9943462) WEST 57 STREET WEST SIDE HIGHWAY NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction Unspecified NaN NaN NaN 3405057 PASSENGER VEHICLE TAXI NaN NaN NaN
33 03/13/2016 14:30 BROOKLYN 11215 40.662743 -73.981729 (40.6627428, -73.9817291) 14 STREET 8 AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404794 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
35 03/13/2016 8:20 BROOKLYN 11239 40.644042 -73.877519 (40.644042, -73.8775195) PENNSYLVANIA AVENUE SEAVIEW AVENUE NaN 0 0 0 0 0 0 0 0 Prescription Medication Unspecified NaN NaN NaN 3405098 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
36 03/13/2016 8:15 MANHATTAN 10065 40.768024 -73.970281 (40.768024, -73.9702814) 5 AVENUE EAST 65 STREET NaN 1 0 0 0 1 0 0 0 Other Vehicular Physical Disability NaN NaN NaN 3404591 TAXI BICYCLE NaN NaN NaN
37 03/13/2016 14:30 BROOKLYN 11211 40.715582 -73.960139 (40.7155824, -73.9601394) BEDFORD AVENUE NORTH 3 STREET NaN 0 0 0 0 0 0 0 0 Failure to Yield Right-of-Way Unspecified NaN NaN NaN 3404848 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
38 03/13/2016 14:28 BRONX 10463 40.876641 -73.901380 (40.8766407, -73.9013799) KINGSBRIDGE TERRACE ALBANY CRESCENT NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404711 TAXI PASSENGER VEHICLE NaN NaN NaN
40 03/13/2016 14:20 BROOKLYN 11212 40.662413 -73.909857 (40.6624125, -73.909857) CHESTER STREET LIVONIA AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404776 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
41 03/13/2016 14:14 BROOKLYN 11233 40.674723 -73.910324 (40.6747232, -73.9103235) NaN NaN 2333 DEAN STREET 0 0 0 0 0 0 0 0 Unspecified Unspecified Unspecified NaN NaN 3404777 OTHER PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN
42 03/13/2016 8:10 BRONX 10475 40.877476 -73.836600 (40.8774757, -73.8366002) BAYCHESTER AVENUE GIVAN AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified Unspecified NaN NaN 3404701 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN
44 03/13/2016 20:50 MANHATTAN 10002 40.718556 -73.988200 (40.7185558, -73.9882) DELANCEY STREET ESSEX STREET NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3405050 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
45 03/13/2016 8:00 BROOKLYN 11233 40.676888 -73.920945 (40.6768884, -73.9209451) ATLANTIC AVENUE PRESCOTT PLACE NaN 1 0 0 0 0 0 1 0 Following Too Closely Unspecified NaN NaN NaN 3404823 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
47 03/13/2016 14:00 QUEENS 11422 40.675463 -73.733840 (40.6754631, -73.7338404) 133 AVENUE LAURELTON PARKWAY SERVICE ROAD S NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 3404869 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
769017 07/01/2012 16:00 MANHATTAN 10022 40.758537 -73.977202 (40.758537, -73.9772016) 5 AVENUE EAST 50 STREET NaN 0 0 0 0 0 0 0 0 Turning Improperly Unspecified NaN NaN NaN 37641 TAXI PASSENGER VEHICLE NaN NaN NaN
769018 07/01/2012 16:00 QUEENS 11422 40.664748 -73.740018 (40.6647479, -73.7400181) BROOKVILLE BOULEVARD 141 AVENUE NaN 0 0 0 0 0 0 0 0 Prescription Medication Unspecified NaN NaN NaN 219399 SPORT UTILITY / STATION WAGON UNKNOWN NaN NaN NaN
769019 07/01/2012 18:00 MANHATTAN 10024 40.784975 -73.982638 (40.7849752, -73.982638) WEST 79 STREET RIVERSIDE DRIVE NaN 0 0 0 0 0 0 0 0 Failure to Yield Right-of-Way Unspecified NaN NaN NaN 52343 MOTORCYCLE PASSENGER VEHICLE NaN NaN NaN
769020 07/01/2012 18:00 QUEENS 11372 40.753731 -73.885038 (40.7537307, -73.8850382) 82 STREET 34 AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 279643 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769021 07/01/2012 18:00 QUEENS 11414 40.652425 -73.838200 (40.6524254, -73.8382001) CROSSBAY BOULEVARD 163 AVENUE NaN 0 0 0 0 0 0 0 0 Fatigued/Drowsy Unspecified NaN NaN NaN 227948 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769022 07/01/2012 18:30 BROOKLYN 11208 40.683133 -73.882582 (40.6831329, -73.882582) SHEPHERD AVENUE RIDGEWOOD AVENUE NaN 1 0 1 0 0 0 0 0 Unspecified NaN NaN NaN NaN 163378 PASSENGER VEHICLE NaN NaN NaN NaN
769023 07/01/2012 18:35 MANHATTAN 10037 40.814774 -73.940369 (40.8147743, -73.9403692) LENOX AVENUE WEST 136 STREET NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 66342 SPORT UTILITY / STATION WAGON MOTORCYCLE NaN NaN NaN
769025 07/01/2012 19:55 BROOKLYN 11221 40.689218 -73.917649 (40.6892183, -73.9176492) PUTNAM AVENUE BUSHWICK AVENUE NaN 1 0 1 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 184522 LIVERY VEHICLE PASSENGER VEHICLE NaN NaN NaN
769026 07/01/2012 1:00 QUEENS 11434 40.679937 -73.776362 (40.6799371, -73.7763624) BAISLEY BOULEVARD 167 STREET NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 268099 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
769028 07/01/2012 1:00 BRONX 10456 40.828729 -73.914723 (40.8287288, -73.9147231) EAST 165 STREET FINDLAY AVENUE NaN 1 0 0 0 0 0 1 0 Physical Disability Unspecified NaN NaN NaN 85161 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
769029 07/01/2012 1:01 BROOKLYN 11217 40.681852 -73.980036 (40.6818517, -73.9800365) 4 AVENUE ST MARKS PLACE NaN 0 0 0 0 0 0 0 0 Backing Unsafely Illness NaN NaN NaN 175826 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
769031 07/01/2012 20:00 BROOKLYN 11235 40.586876 -73.956972 (40.5868757, -73.9569723) WILLIAMS COURT HOMECREST AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 116268 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
769032 07/01/2012 21:00 BRONX 10452 40.841818 -73.922437 (40.8418183, -73.9224369) EDWARD L GRANT HIGHWAY NELSON AVENUE NaN 0 0 0 0 0 0 0 0 Driver Inattention/Distraction Driver Inattention/Distraction NaN NaN NaN 85205 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769033 07/01/2012 21:00 MANHATTAN 10002 40.715357 -73.978039 (40.7153567, -73.9780389) BARUCH DRIVE DELANCEY STREET NaN 0 0 0 0 0 0 0 0 Backing Unsafely Unspecified NaN NaN NaN 12183 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
769034 07/01/2012 21:00 MANHATTAN 10018 40.759527 -73.999242 (40.7595273, -73.999242) WEST 40 STREET 11 AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 16527 PASSENGER VEHICLE OTHER NaN NaN NaN
769035 07/01/2012 21:05 MANHATTAN 10023 40.778607 -73.981622 (40.7786073, -73.9816215) WEST 72 STREET AMSTERDAM AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 52341 PASSENGER VEHICLE TAXI NaN NaN NaN
769036 07/01/2012 21:08 BROOKLYN 11212 40.672075 -73.911336 (40.6720753, -73.9113364) EAST NEW YORK AVENUE ROCKAWAY AVENUE NaN 1 0 0 0 1 0 0 0 Reaction to Other Uninvolved Vehicle Unspecified NaN NaN NaN 160106 PASSENGER VEHICLE BICYCLE NaN NaN NaN
769037 07/01/2012 22:00 MANHATTAN 10032 40.831568 -73.942981 (40.8315678, -73.9429813) WEST 155 STREET AMSTERDAM AVENUE NaN 1 0 1 0 0 0 0 0 Unspecified NaN NaN NaN NaN 68163 BUS NaN NaN NaN NaN
769039 07/01/2012 5:00 MANHATTAN 10017 40.751441 -73.973959 (40.7514411, -73.9739594) EAST 43 STREET 3 AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 32050 TAXI TAXI NaN NaN NaN
769040 07/01/2012 17:30 MANHATTAN 10035 40.807598 -73.937179 (40.8075979, -73.9371788) EAST 129 STREET PARK AVENUE NaN 1 0 0 0 0 0 1 0 Unspecified Unspecified NaN NaN NaN 59632 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
769041 07/01/2012 17:32 BROOKLYN 11235 40.579187 -73.964008 (40.5791872, -73.9640079) BRIGHTON 4 STREET OCEANVIEW AVENUE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 113563 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
769044 07/01/2012 12:00 MANHATTAN 10012 40.723365 -73.995703 (40.7233651, -73.9957032) MULBERRY STREET PRINCE STREET NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 5298 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
769045 07/01/2012 12:00 BROOKLYN 11220 40.633795 -74.011327 (40.6337945, -74.0113274) 8 AVENUE 63 STREET NaN 0 0 0 0 0 0 0 0 Turning Improperly Unspecified NaN NaN NaN 140805 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
769046 07/01/2012 11:55 STATEN ISLAND 10303 40.626218 -74.157824 (40.6262181, -74.1578241) FOREST AVENUE UNION AVENUE NaN 2 0 0 0 0 0 2 0 Unspecified Unspecified NaN NaN NaN 284649 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769047 07/01/2012 10:50 BROOKLYN 11229 40.603873 -73.936096 (40.6038728, -73.9360956) STUART STREET AVENUE T NaN 0 0 0 0 0 0 0 0 Backing Unsafely Unspecified NaN NaN NaN 116255 SPORT UTILITY / STATION WAGON OTHER NaN NaN NaN
769048 07/01/2012 10:11 MANHATTAN 10002 40.720477 -73.989719 (40.7204773, -73.9897185) ALLEN STREET RIVINGTON STREET NaN 0 0 0 0 0 0 0 0 Failure to Yield Right-of-Way Unspecified NaN NaN NaN 12177 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769049 07/01/2012 6:55 QUEENS 11385 40.708991 -73.909700 (40.7089912, -73.9097003) HARMAN STREET FAIRVIEW AVENUE NaN 1 0 0 0 0 0 1 0 Unspecified Unspecified NaN NaN NaN 213209 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
769050 07/01/2012 6:55 BROOKLYN 11225 40.669780 -73.958292 (40.6697798, -73.9582923) FRANKLIN AVENUE UNION STREET NaN 0 0 0 0 0 0 0 0 Lost Consciousness Unspecified NaN NaN NaN 152367 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
769051 07/01/2012 6:50 BRONX 10468 40.862679 -73.909040 (40.8626793, -73.9090397) SEDGWICK AVENUE WEST FORDHAM ROAD NaN 1 0 0 0 1 0 0 0 Driver Inattention/Distraction Unspecified NaN NaN NaN 109483 PASSENGER VEHICLE BICYCLE NaN NaN NaN
769053 07/01/2012 14:40 STATEN ISLAND 10303 40.636818 -74.157939 (40.6368184, -74.1579392) UNION AVENUE RICHMOND TERRACE NaN 0 0 0 0 0 0 0 0 Unspecified Unspecified NaN NaN NaN 284652 PASSENGER VEHICLE UNKNOWN NaN NaN NaN

584753 rows × 29 columns


In [4]:
print "Collisions in all %d" % len(collisions)


Collisions in all 584753

In [5]:
borough = collisions.BOROUGH.unique()
contributions = collisions['CONTRIBUTING FACTOR VEHICLE 1'].unique()
vehicle_types = collisions['VEHICLE TYPE CODE 1'].unique()

In [6]:
pd.DataFrame(collisions.columns)


Out[6]:
0
0 DATE
1 TIME
2 BOROUGH
3 ZIP CODE
4 LATITUDE
5 LONGITUDE
6 LOCATION
7 ON STREET NAME
8 CROSS STREET NAME
9 OFF STREET NAME
10 NUMBER OF PERSONS INJURED
11 NUMBER OF PERSONS KILLED
12 NUMBER OF PEDESTRIANS INJURED
13 NUMBER OF PEDESTRIANS KILLED
14 NUMBER OF CYCLIST INJURED
15 NUMBER OF CYCLIST KILLED
16 NUMBER OF MOTORIST INJURED
17 NUMBER OF MOTORIST KILLED
18 CONTRIBUTING FACTOR VEHICLE 1
19 CONTRIBUTING FACTOR VEHICLE 2
20 CONTRIBUTING FACTOR VEHICLE 3
21 CONTRIBUTING FACTOR VEHICLE 4
22 CONTRIBUTING FACTOR VEHICLE 5
23 UNIQUE KEY
24 VEHICLE TYPE CODE 1
25 VEHICLE TYPE CODE 2
26 VEHICLE TYPE CODE 3
27 VEHICLE TYPE CODE 4
28 VEHICLE TYPE CODE 5

In [7]:
injured = collisions['NUMBER OF PEDESTRIANS INJURED'].sum() + \
collisions['NUMBER OF CYCLIST INJURED'].sum() + \
collisions['NUMBER OF MOTORIST INJURED'].sum()

killed = collisions['NUMBER OF PEDESTRIANS KILLED'].sum() + collisions['NUMBER OF CYCLIST KILLED'].sum() + collisions['NUMBER OF MOTORIST KILLED'].sum()

In [8]:
print 'People killed in NYC between 2012 and 2016 marts: %d' % killed
print 'People injured in NYC between 2012 and 2016 marts: %d' % injured


People killed in NYC between 2012 and 2016 marts: 713
People injured in NYC between 2012 and 2016 marts: 146081

In [9]:
def sumKillings(df):
    return df['NUMBER OF PEDESTRIANS KILLED'].sum() + df['NUMBER OF CYCLIST KILLED'].sum() + df['NUMBER OF MOTORIST KILLED'].sum()

def sumInjuries(df):
    return df['NUMBER OF PEDESTRIANS INJURED'].sum() + df['NUMBER OF CYCLIST INJURED'].sum() + df['NUMBER OF MOTORIST INJURED'].sum()

xs = []
ys = []
for b in collisions['BOROUGH'].unique():
    mask = (collisions.BOROUGH == b)
    col_by_bo = collisions[mask]
    ys.append(sumInjuries(col_by_bo))
    xs.append(b)
df = pd.DataFrame(pd.Series(ys, index=xs, name="Injuries by Borough"))
    
df.plot(kind='bar', figsize=(8,8), fontsize=20, rot=45)


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c365c18>

In [11]:
xs = []
ys = []
for b in collisions['BOROUGH'].unique():
    mask = (collisions.BOROUGH == b)
    col_by_bo = collisions[mask]
    ys.append(sumKillings(col_by_bo))
    xs.append(b)
df = pd.DataFrame(pd.Series(ys, index=xs, name="Fatalities by borough"))

df.plot(kind='bar', figsize=(8,8), fontsize=20, rot=45)


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x15146668>

What is the cause of deaths?

  1. First overall
  2. Then per district

In [12]:
mask_killings = ((collisions['NUMBER OF MOTORIST KILLED'] >= 1) | (collisions['NUMBER OF PEDESTRIANS KILLED'] >= 1) | (collisions['NUMBER OF CYCLIST KILLED'] >= 1))
mask_injuired = ((collisions['NUMBER OF MOTORIST INJURED'] >= 1) | (collisions['NUMBER OF PEDESTRIANS INJURED'] >= 1) | (collisions['NUMBER OF CYCLIST INJURED'] >= 1))

collisions_with_killings = collisions[mask_killings]

collisions_with_injuries = collisions[mask_injuired]

print 'Collisions with killings %d' % len(collisions_with_killings)
print 'Collisions with injuries %d' % len(collisions_with_injuries)


Collisions with killings 688
Collisions with injuries 109552

In [1]:
def filter_cause(cause):
    return collisions[(collisions['CONTRIBUTING FACTOR VEHICLE 1'] == cause) | \
                (collisions['CONTRIBUTING FACTOR VEHICLE 2'] == cause) | \
                (collisions['CONTRIBUTING FACTOR VEHICLE 3'] == cause) | \
                (collisions['CONTRIBUTING FACTOR VEHICLE 4'] == cause) | \
                (collisions['CONTRIBUTING FACTOR VEHICLE 5'] == cause)]

def draw_kde(data):
    bbox = BoundingBox(north=data.LATITUDE.max()-0.055,\
                       west=data.LONGITUDE.min()+0.055,\
                       south=data.LATITUDE.min()-0.055,\
                       east=data.LONGITUDE.max()+0.055)

    #bbox = BoundingBox(north=40.915256, west=-74.255735, south=40.496044, east=-73.700272)

    coords = {'lat': data.LATITUDE.values.tolist(), 'lon': data.LONGITUDE.values.tolist()}
    
    glp.kde(coords, bw=2, cut_below=1e-4)
    glp.set_bbox(bbox)
    glp.inline()

In [14]:
for c in collisions['CONTRIBUTING FACTOR VEHICLE 1'].unique().tolist()[:2]:
    filtered = filter_cause(c)
    print "CAUSE: " + c
    print "COUNT: " + str(len(filtered)) + " of " + str(len(collisions))
    draw_kde(filtered)


CAUSE: Unspecified
COUNT: 641662 of 769054
('smallest non-zero count', 8.955579620337621e-09)
('max count:', 39.043010631210215)
CAUSE: Other Vehicular
COUNT: 31153 of 769054
('smallest non-zero count', 4.4777898101688105e-09)
('max count:', 13.024112185700403)

In [15]:
# Draw a heat map of all collisions with killed people
draw_kde(collisions_with_killings)


('smallest non-zero count', 4.4777898101688105e-09)
('max count:', 0.14918840773812889)

In [16]:
draw_kde(collisions_with_injuries)


('smallest non-zero count', 4.4777898101688105e-09)
('max count:', 6.8284101814494713)

In [17]:
draw_kde(collisions_with_killings[collisions_with_killings['NUMBER OF CYCLIST KILLED'] >= 1])


('smallest non-zero count', 4.4777898101688105e-09)
('max count:', 0.11981243389582699)

In [18]:
collisions_with_killings['NUMBER OF CYCLIST KILLED'].sum()
collisions_with_injuries['NUMBER OF CYCLIST INJURED'].sum()


Out[18]:
13686L

In [19]:
glp.geojson('datasets/police_districts.txt')
glp.inline()


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-19-bb596986b4d6> in <module>()
----> 1 glp.geojson('datasets/police_districts.txt')
      2 glp.inline()

C:\Users\Casper\Anaconda2\lib\site-packages\geoplotlib\__init__.pyc in geojson(filename, color, linewidth, fill, f_tooltip)
    235     """
    236     from geoplotlib.layers import GeoJSONLayer
--> 237     _global_config.layers.append(GeoJSONLayer(filename, color=color, linewidth=linewidth, fill=fill, f_tooltip=f_tooltip))
    238 
    239 

C:\Users\Casper\Anaconda2\lib\site-packages\geoplotlib\layers.pyc in __init__(self, geojson_or_fname, color, linewidth, fill, f_tooltip)
    895 
    896         if type(geojson_or_fname) == str:
--> 897             with open(geojson_or_fname) as fin:
    898                 self.data = json.load(fin)
    899         elif type(geojson_or_fname) == dict:

IOError: [Errno 2] No such file or directory: 'datasets/police_districts.txt'

In [ ]:
glp.geojson('datasets/School_Districts.geojson')
glp.geojson('datasets/schools.geojson')
glp.inline()

In [23]:
collisions.head()

years = ['2012', '2013', '2014', '2015']

series = {}

for year in years:
    coll_by_year = collisions[collisions.DATE.str.contains(year)]
    xs = []
    ys = []
    
    
    for b in coll_by_year['BOROUGH'].unique():
        mask = (coll_by_year.BOROUGH == b)
        col_by_bo = coll_by_year[mask]
        ys.append(sumInjuries(col_by_bo))
        xs.append(b)
    series[year] = pd.Series(ys, index=xs, name=year).sort_values();
df = pd.DataFrame(series)
df.plot(kind='bar', subplots=False, figsize=(10,10), layout=(2,2), sharey=True, fontsize=20, rot=50)


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x14ba19b0>

In [24]:
quarters = [
    ('q1', ['01','02','03']),
    ('q2', ['04','05','06']),
    ('q3', ['07','08','09']),
    ('q4', ['10','11','12'])
]

In [25]:
series = {}

for b in collisions['BOROUGH'].unique():
    xs = []
    ys = []
    boroughs = collisions[(collisions.BOROUGH == b)]
    print b
    for y in years:
        print y
        for (q,m) in quarters:
            col = boroughs[(boroughs.DATE.str.contains(y)) & (boroughs.DATE.str.split('/').str[0].isin(m))]
            ys.append(len(col))
            xs.append(y + ' ' + q)
    series[b] = pd.Series(ys, index=xs, name=b)


QUEENS
2012
2013
2014
2015
MANHATTAN
2012
2013
2014
2015
BROOKLYN
2012
2013
2014
2015
STATEN ISLAND
2012
2013
2014
2015
BRONX
2012
2013
2014
2015

In [26]:
pd.DataFrame(series).plot(kind='bar', subplots=True, layout=(3,2), figsize=(14,14))


Out[26]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000019FD5A20>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000000199374A8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000013802D68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000001644FCC0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001F322550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000001DF33128>]], dtype=object)

In [27]:
series = {}

for b in collisions['BOROUGH'].unique():
    xs = []
    ys = []
    boroughs = collisions[(collisions.BOROUGH == b)]
    print b
    for y in years:
        print y
        for m in range(1,13):
            month = str(m)
            if (m < 10):
                month = '0' + str(m)
            col = boroughs[(boroughs.DATE.str.contains(y)) & (boroughs.DATE.str.split('/').str[0] == month)]
            ys.append(len(col))
            xs.append(y + ' ' + str(m))
    series[b] = pd.Series(ys, index=xs, name=b)


QUEENS
2012
2013
2014
2015
MANHATTAN
2012
2013
2014
2015
BROOKLYN
2012
2013
2014
2015
STATEN ISLAND
2012
2013
2014
2015
BRONX
2012
2013
2014
2015

In [28]:
pd.DataFrame(series).plot(kind='bar', subplots=True, layout=(5,1), figsize=(14,14), fontsize=15)


Out[28]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000F322668>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001697B4A8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001597ADA0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000013F24D68>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000018D865F8>]], dtype=object)

Plot vehicle types


In [29]:
vehicle_types_index = ['VEHICLE TYPE CODE 1','VEHICLE TYPE CODE 2','VEHICLE TYPE CODE 3','VEHICLE TYPE CODE 4','VEHICLE TYPE CODE 5']
vehicle_types = collisions['VEHICLE TYPE CODE 1'].unique()


def countVehicleType(dataset):
    series = None
    for i in vehicle_types_index:
        c = dataset[i].value_counts()
        if series is None:
            series = c
        else:
            for k,v in c.iteritems():
                series[k] += v
    return series


#pd.DataFrame(vehicle_type.value_counts()).plot(kind='bar')
vt_overall = countVehicleType(collisions)
vt_overall.name = 'Vehicle types'
pd.DataFrame(vt_overall).sort_values(by='Vehicle types').plot(kind='bar', figsize=(14,14))

#series


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x202d1fd0>

In [30]:
series = {}

for b in collisions['BOROUGH'].unique():
    series[b] = countVehicleType(collisions[collisions.BOROUGH == b])
    
pd.DataFrame(series).plot(kind='bar', figsize=(12,12), subplots=True)


Out[30]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x0000000018B4BD68>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000000000F990160>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000000001D0DCA58>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000000001C5D8588>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000000001FF4F320>], dtype=object)