In [8]:
# this ipython notebook might be used for simple statistics exploration, if needed 

import pandas as pd
pd.set_option('display.mpl_style', 'default')
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)
import numpy as np
import os

In [9]:
dpath = "./"
df = pd.read_csv(os.path.join(dpath,'data','150days_crashdata.csv'))
df.columns


Out[9]:
Index([u'DATE', u'TIME', u'BOROUGH', u'ZIP_CODE', u'LATITUDE', u'LONGITUDE', u'LOCATION', u'ON_STREET_NAME', u'CROSS_STREET_NAME', u'OFF_STREET_NAME', u'NUMBER_OF_PERSONS_INJURED', u'NUMBER_OF_PERSONS_KILLED', u'NUMBER_OF_PEDESTRIANS_INJURED', u'NUMBER_OF_PEDESTRIANS_KILLED', u'NUMBER_OF_CYCLIST_INJURED', u'NUMBER_OF_CYCLIST_KILLED', u'NUMBER_OF_MOTORIST_INJURED', u'NUMBER_OF_MOTORIST_KILLED', u'CONTRIBUTING_FACTOR_VEHICLE_1', u'CONTRIBUTING_FACTOR_VEHICLE_2', u'CONTRIBUTING_FACTOR_VEHICLE_3', u'CONTRIBUTING_FACTOR_VEHICLE_4', u'CONTRIBUTING_FACTOR_VEHICLE_5', u'UNIQUE_KEY', u'VEHICLE_TYPE_CODE_1', u'VEHICLE_TYPE_CODE_2', u'VEHICLE_TYPE_CODE_3', u'VEHICLE_TYPE_CODE_4', u'VEHICLE_TYPE_CODE_5'], dtype='object')

In [10]:
df.groupby('CONTRIBUTING_FACTOR_VEHICLE_1')['CONTRIBUTING_FACTOR_VEHICLE_1'].count()


Out[10]:
CONTRIBUTING_FACTOR_VEHICLE_1
Accelerator Defective                                       40
Aggressive Driving/Road Rage                               265
Alcohol Involvement                                        675
Animals Action                                              46
Backing Unsafely                                          2499
Brakes Defective                                           206
Cell Phone (hand-held)                                       7
Cell Phone (hands-free)                                     11
Driver Inattention/Distraction                           10458
Driver Inexperience                                        997
Drugs (Illegal)                                             43
Failure to Keep Right                                      158
Failure to Yield Right-of-Way                             3938
Fatigued/Drowsy                                           5092
Fell Asleep                                                101
Following Too Closely                                       42
Glare                                                      220
Headlights Defective                                         6
Illness                                                    223
Lane Marking Improper/Inadequate                            45
Lost Consciousness                                        2205
Obstruction/Debris                                         149
Other Electronic Device                                    254
Other Lighting Defects                                      11
Other Vehicular                                           3859
Outside Car Distraction                                    968
Oversized Vehicle                                          563
Passenger Distraction                                      359
Passing or Lane Usage Improper                              26
Pavement Defective                                         104
Pavement Slippery                                         2512
Pedestrian/Bicyclist/Other Pedestrian Error/Confusion       10
Physical Disability                                        944
Prescription Medication                                   1544
Reaction to Other Uninvolved Vehicle                       139
Shoulders Defective/Improper                                 3
Steering Failure                                            79
Tire Failure/Inadequate                                     70
Tow Hitch Defective                                          6
Traffic Control Device Improper/Non-Working                 37
Traffic Control Disregarded                               1173
Turning Improperly                                        2288
Unsafe Lane Changing                                        24
Unsafe Speed                                               100
Unspecified                                              37653
View Obstructed/Limited                                    315
Windshield Inadequate                                        5
Name: CONTRIBUTING_FACTOR_VEHICLE_1, dtype: int64

In [11]:
df.groupby('CONTRIBUTING_FACTOR_VEHICLE_2')['CONTRIBUTING_FACTOR_VEHICLE_2'].count()


Out[11]:
CONTRIBUTING_FACTOR_VEHICLE_2
Accelerator Defective                                        5
Aggressive Driving/Road Rage                                65
Alcohol Involvement                                         87
Animals Action                                               6
Backing Unsafely                                           407
Brakes Defective                                            27
Cell Phone (hand-held)                                       1
Cell Phone (hands-free)                                      1
Driver Inattention/Distraction                            3220
Driver Inexperience                                        376
Drugs (Illegal)                                             11
Failure to Keep Right                                       29
Failure to Yield Right-of-Way                              800
Fatigued/Drowsy                                           1376
Fell Asleep                                                 42
Following Too Closely                                        7
Glare                                                       31
Headlights Defective                                         6
Illness                                                     56
Lane Marking Improper/Inadequate                            27
Lost Consciousness                                         651
Obstruction/Debris                                          30
Other Electronic Device                                     80
Other Lighting Defects                                       3
Other Vehicular                                           2452
Outside Car Distraction                                    232
Oversized Vehicle                                          125
Passenger Distraction                                       78
Passing or Lane Usage Improper                               6
Pavement Defective                                          15
Pavement Slippery                                          605
Pedestrian/Bicyclist/Other Pedestrian Error/Confusion        1
Physical Disability                                        252
Prescription Medication                                    389
Reaction to Other Uninvolved Vehicle                        44
Steering Failure                                             5
Tire Failure/Inadequate                                      5
Traffic Control Device Improper/Non-Working                 25
Traffic Control Disregarded                                367
Turning Improperly                                         610
Unsafe Lane Changing                                         3
Unsafe Speed                                                21
Unspecified                                              57624
View Obstructed/Limited                                     95
Name: CONTRIBUTING_FACTOR_VEHICLE_2, dtype: int64

In [12]:
df.groupby('VEHICLE_TYPE_CODE_1')['VEHICLE_TYPE_CODE_1'].count()


Out[12]:
VEHICLE_TYPE_CODE_1
AMBULANCE                           246
BICYCLE                              45
BUS                                1472
FIRE TRUCK                           90
LARGE COM VEH(6 OR MORE TIRES)     1547
LIVERY VEHICLE                     1167
MOTORCYCLE                          148
OTHER                              2566
PASSENGER VEHICLE                 42385
PEDICAB                               1
PICK-UP TRUCK                      1216
SCOOTER                              21
SMALL COM VEH(4 TIRES)             1332
SPORT UTILITY / STATION WAGON     20098
TAXI                               3348
UNKNOWN                            2277
VAN                                2658
Name: VEHICLE_TYPE_CODE_1, dtype: int64

In [13]:
df.groupby('VEHICLE_TYPE_CODE_2')['VEHICLE_TYPE_CODE_2'].count()


Out[13]:
VEHICLE_TYPE_CODE_2
AMBULANCE                           124
BICYCLE                             960
BUS                                1308
FIRE TRUCK                           50
LARGE COM VEH(6 OR MORE TIRES)     1441
LIVERY VEHICLE                      853
MOTORCYCLE                          146
OTHER                              2423
PASSENGER VEHICLE                 31924
PEDICAB                               3
PICK-UP TRUCK                      1109
SCOOTER                              15
SMALL COM VEH(4 TIRES)             1377
SPORT UTILITY / STATION WAGON     15414
TAXI                               2899
UNKNOWN                            8929
VAN                                2354
Name: VEHICLE_TYPE_CODE_2, dtype: int64

In [14]:
#normilize by square at least; population
byboro = df.groupby('BOROUGH')
byboro['NUMBER_OF_PERSONS_INJURED'].count()


Out[14]:
BOROUGH
BRONX             7860
BROOKLYN         18849
MANHATTAN        15230
QUEENS           16118
STATEN ISLAND     2388
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64

In [15]:
byboro['NUMBER_OF_PERSONS_KILLED'].count()


Out[15]:
BOROUGH
BRONX             7860
BROOKLYN         18849
MANHATTAN        15230
QUEENS           16118
STATEN ISLAND     2388
Name: NUMBER_OF_PERSONS_KILLED, dtype: int64

In [16]:
#normilize by length at least; traffic
byonstreet = df.groupby('ON_STREET_NAME')
byonstreet['NUMBER_OF_PERSONS_INJURED'].count()


Out[16]:
ON_STREET_NAME
1 AVENUE          275
1 PLACE             1
1 STREET           11
10 AVENUE         358
10 STREET          12
100 AVENUE          5
100 DRIVE           1
100 STREET         21
101 AVENUE         89
101 ROAD            1
101 STREET         19
102 AVENUE         10
102 STREET         34
103 AVENUE         55
103 ROAD            1
...
WYCKOFF AVENUE           35
WYCKOFF STREET            6
WYONA STREET              5
WYTHE AVENUE             50
WYTHE PLACE               6
XENIA STREET              1
YATES AVENUE              5
YELLOWSTONE BOULEVARD    47
YORK AVENUE              89
YORK STREET               5
YOUNG AVENUE              2
ZEISER PLACE              2
ZEREGA AVENUE            28
ZOE STREET                2
ZULETTE AVENUE            5
Name: NUMBER_OF_PERSONS_INJURED, Length: 4241, dtype: int64

In [17]:
byonstreet['NUMBER_OF_PERSONS_KILLED'].count()


Out[17]:
ON_STREET_NAME
1 AVENUE          275
1 PLACE             1
1 STREET           11
10 AVENUE         358
10 STREET          12
100 AVENUE          5
100 DRIVE           1
100 STREET         21
101 AVENUE         89
101 ROAD            1
101 STREET         19
102 AVENUE         10
102 STREET         34
103 AVENUE         55
103 ROAD            1
...
WYCKOFF AVENUE           35
WYCKOFF STREET            6
WYONA STREET              5
WYTHE AVENUE             50
WYTHE PLACE               6
XENIA STREET              1
YATES AVENUE              5
YELLOWSTONE BOULEVARD    47
YORK AVENUE              89
YORK STREET               5
YOUNG AVENUE              2
ZEISER PLACE              2
ZEREGA AVENUE            28
ZOE STREET                2
ZULETTE AVENUE            5
Name: NUMBER_OF_PERSONS_KILLED, Length: 4241, dtype: int64

In [18]:
byboro_veh1 = df.groupby(['BOROUGH','VEHICLE_TYPE_CODE_1'])
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['BRONX','BROOKLYN']]


Out[18]:
BOROUGH   VEHICLE_TYPE_CODE_1           
BRONX     AMBULANCE                            56
          BICYCLE                               6
          BUS                                 173
          FIRE TRUCK                            9
          LARGE COM VEH(6 OR MORE TIRES)      118
          LIVERY VEHICLE                      188
          MOTORCYCLE                            8
          OTHER                               398
          PASSENGER VEHICLE                  4287
          PICK-UP TRUCK                        78
          SCOOTER                               1
          SMALL COM VEH(4 TIRES)              112
          SPORT UTILITY / STATION WAGON      1721
          TAXI                                114
          UNKNOWN                             372
          VAN                                 213
BROOKLYN  AMBULANCE                            60
          BICYCLE                              11
          BUS                                 361
          FIRE TRUCK                           27
          LARGE COM VEH(6 OR MORE TIRES)      301
          LIVERY VEHICLE                      155
          MOTORCYCLE                           32
          OTHER                               707
          PASSENGER VEHICLE                 10331
          PICK-UP TRUCK                       225
          SCOOTER                               6
          SMALL COM VEH(4 TIRES)              301
          SPORT UTILITY / STATION WAGON      4852
          TAXI                                272
          UNKNOWN                             562
          VAN                                 626
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64

In [19]:
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['MANHATTAN','STATEN ISLAND']]


Out[19]:
BOROUGH        VEHICLE_TYPE_CODE_1           
MANHATTAN      AMBULANCE                           59
               BICYCLE                             18
               BUS                                426
               FIRE TRUCK                          17
               LARGE COM VEH(6 OR MORE TIRES)     388
               LIVERY VEHICLE                     442
               MOTORCYCLE                          47
               OTHER                              574
               PASSENGER VEHICLE                 5914
               PEDICAB                              1
               PICK-UP TRUCK                      349
               SCOOTER                              8
               SMALL COM VEH(4 TIRES)             447
               SPORT UTILITY / STATION WAGON     3085
               TAXI                              2263
               UNKNOWN                            318
               VAN                                833
STATEN ISLAND  AMBULANCE                            2
               BUS                                 26
               FIRE TRUCK                           4
               LARGE COM VEH(6 OR MORE TIRES)       3
               LIVERY VEHICLE                       3
               MOTORCYCLE                           4
               OTHER                               55
               PASSENGER VEHICLE                 1252
               PICK-UP TRUCK                       61
               SMALL COM VEH(4 TIRES)               6
               SPORT UTILITY / STATION WAGON      857
               TAXI                                 1
               UNKNOWN                             67
               VAN                                 47
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64

In [20]:
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['QUEENS']]


Out[20]:
BOROUGH  VEHICLE_TYPE_CODE_1           
QUEENS   AMBULANCE                           29
         BICYCLE                              8
         BUS                                244
         FIRE TRUCK                          16
         LARGE COM VEH(6 OR MORE TIRES)     261
         LIVERY VEHICLE                     132
         MOTORCYCLE                          26
         OTHER                              296
         PASSENGER VEHICLE                 9178
         PICK-UP TRUCK                      190
         SCOOTER                              2
         SMALL COM VEH(4 TIRES)             213
         SPORT UTILITY / STATION WAGON     4362
         TAXI                               210
         UNKNOWN                            549
         VAN                                399
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64

In [20]: