In [8]:
# this ipython notebook might be used for simple statistics exploration, if needed
import pandas as pd
pd.set_option('display.mpl_style', 'default')
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)
import numpy as np
import os
In [9]:
dpath = "./"
df = pd.read_csv(os.path.join(dpath,'data','150days_crashdata.csv'))
df.columns
Out[9]:
Index([u'DATE', u'TIME', u'BOROUGH', u'ZIP_CODE', u'LATITUDE', u'LONGITUDE', u'LOCATION', u'ON_STREET_NAME', u'CROSS_STREET_NAME', u'OFF_STREET_NAME', u'NUMBER_OF_PERSONS_INJURED', u'NUMBER_OF_PERSONS_KILLED', u'NUMBER_OF_PEDESTRIANS_INJURED', u'NUMBER_OF_PEDESTRIANS_KILLED', u'NUMBER_OF_CYCLIST_INJURED', u'NUMBER_OF_CYCLIST_KILLED', u'NUMBER_OF_MOTORIST_INJURED', u'NUMBER_OF_MOTORIST_KILLED', u'CONTRIBUTING_FACTOR_VEHICLE_1', u'CONTRIBUTING_FACTOR_VEHICLE_2', u'CONTRIBUTING_FACTOR_VEHICLE_3', u'CONTRIBUTING_FACTOR_VEHICLE_4', u'CONTRIBUTING_FACTOR_VEHICLE_5', u'UNIQUE_KEY', u'VEHICLE_TYPE_CODE_1', u'VEHICLE_TYPE_CODE_2', u'VEHICLE_TYPE_CODE_3', u'VEHICLE_TYPE_CODE_4', u'VEHICLE_TYPE_CODE_5'], dtype='object')
In [10]:
df.groupby('CONTRIBUTING_FACTOR_VEHICLE_1')['CONTRIBUTING_FACTOR_VEHICLE_1'].count()
Out[10]:
CONTRIBUTING_FACTOR_VEHICLE_1
Accelerator Defective 40
Aggressive Driving/Road Rage 265
Alcohol Involvement 675
Animals Action 46
Backing Unsafely 2499
Brakes Defective 206
Cell Phone (hand-held) 7
Cell Phone (hands-free) 11
Driver Inattention/Distraction 10458
Driver Inexperience 997
Drugs (Illegal) 43
Failure to Keep Right 158
Failure to Yield Right-of-Way 3938
Fatigued/Drowsy 5092
Fell Asleep 101
Following Too Closely 42
Glare 220
Headlights Defective 6
Illness 223
Lane Marking Improper/Inadequate 45
Lost Consciousness 2205
Obstruction/Debris 149
Other Electronic Device 254
Other Lighting Defects 11
Other Vehicular 3859
Outside Car Distraction 968
Oversized Vehicle 563
Passenger Distraction 359
Passing or Lane Usage Improper 26
Pavement Defective 104
Pavement Slippery 2512
Pedestrian/Bicyclist/Other Pedestrian Error/Confusion 10
Physical Disability 944
Prescription Medication 1544
Reaction to Other Uninvolved Vehicle 139
Shoulders Defective/Improper 3
Steering Failure 79
Tire Failure/Inadequate 70
Tow Hitch Defective 6
Traffic Control Device Improper/Non-Working 37
Traffic Control Disregarded 1173
Turning Improperly 2288
Unsafe Lane Changing 24
Unsafe Speed 100
Unspecified 37653
View Obstructed/Limited 315
Windshield Inadequate 5
Name: CONTRIBUTING_FACTOR_VEHICLE_1, dtype: int64
In [11]:
df.groupby('CONTRIBUTING_FACTOR_VEHICLE_2')['CONTRIBUTING_FACTOR_VEHICLE_2'].count()
Out[11]:
CONTRIBUTING_FACTOR_VEHICLE_2
Accelerator Defective 5
Aggressive Driving/Road Rage 65
Alcohol Involvement 87
Animals Action 6
Backing Unsafely 407
Brakes Defective 27
Cell Phone (hand-held) 1
Cell Phone (hands-free) 1
Driver Inattention/Distraction 3220
Driver Inexperience 376
Drugs (Illegal) 11
Failure to Keep Right 29
Failure to Yield Right-of-Way 800
Fatigued/Drowsy 1376
Fell Asleep 42
Following Too Closely 7
Glare 31
Headlights Defective 6
Illness 56
Lane Marking Improper/Inadequate 27
Lost Consciousness 651
Obstruction/Debris 30
Other Electronic Device 80
Other Lighting Defects 3
Other Vehicular 2452
Outside Car Distraction 232
Oversized Vehicle 125
Passenger Distraction 78
Passing or Lane Usage Improper 6
Pavement Defective 15
Pavement Slippery 605
Pedestrian/Bicyclist/Other Pedestrian Error/Confusion 1
Physical Disability 252
Prescription Medication 389
Reaction to Other Uninvolved Vehicle 44
Steering Failure 5
Tire Failure/Inadequate 5
Traffic Control Device Improper/Non-Working 25
Traffic Control Disregarded 367
Turning Improperly 610
Unsafe Lane Changing 3
Unsafe Speed 21
Unspecified 57624
View Obstructed/Limited 95
Name: CONTRIBUTING_FACTOR_VEHICLE_2, dtype: int64
In [12]:
df.groupby('VEHICLE_TYPE_CODE_1')['VEHICLE_TYPE_CODE_1'].count()
Out[12]:
VEHICLE_TYPE_CODE_1
AMBULANCE 246
BICYCLE 45
BUS 1472
FIRE TRUCK 90
LARGE COM VEH(6 OR MORE TIRES) 1547
LIVERY VEHICLE 1167
MOTORCYCLE 148
OTHER 2566
PASSENGER VEHICLE 42385
PEDICAB 1
PICK-UP TRUCK 1216
SCOOTER 21
SMALL COM VEH(4 TIRES) 1332
SPORT UTILITY / STATION WAGON 20098
TAXI 3348
UNKNOWN 2277
VAN 2658
Name: VEHICLE_TYPE_CODE_1, dtype: int64
In [13]:
df.groupby('VEHICLE_TYPE_CODE_2')['VEHICLE_TYPE_CODE_2'].count()
Out[13]:
VEHICLE_TYPE_CODE_2
AMBULANCE 124
BICYCLE 960
BUS 1308
FIRE TRUCK 50
LARGE COM VEH(6 OR MORE TIRES) 1441
LIVERY VEHICLE 853
MOTORCYCLE 146
OTHER 2423
PASSENGER VEHICLE 31924
PEDICAB 3
PICK-UP TRUCK 1109
SCOOTER 15
SMALL COM VEH(4 TIRES) 1377
SPORT UTILITY / STATION WAGON 15414
TAXI 2899
UNKNOWN 8929
VAN 2354
Name: VEHICLE_TYPE_CODE_2, dtype: int64
In [14]:
#normilize by square at least; population
byboro = df.groupby('BOROUGH')
byboro['NUMBER_OF_PERSONS_INJURED'].count()
Out[14]:
BOROUGH
BRONX 7860
BROOKLYN 18849
MANHATTAN 15230
QUEENS 16118
STATEN ISLAND 2388
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64
In [15]:
byboro['NUMBER_OF_PERSONS_KILLED'].count()
Out[15]:
BOROUGH
BRONX 7860
BROOKLYN 18849
MANHATTAN 15230
QUEENS 16118
STATEN ISLAND 2388
Name: NUMBER_OF_PERSONS_KILLED, dtype: int64
In [16]:
#normilize by length at least; traffic
byonstreet = df.groupby('ON_STREET_NAME')
byonstreet['NUMBER_OF_PERSONS_INJURED'].count()
Out[16]:
ON_STREET_NAME
1 AVENUE 275
1 PLACE 1
1 STREET 11
10 AVENUE 358
10 STREET 12
100 AVENUE 5
100 DRIVE 1
100 STREET 21
101 AVENUE 89
101 ROAD 1
101 STREET 19
102 AVENUE 10
102 STREET 34
103 AVENUE 55
103 ROAD 1
...
WYCKOFF AVENUE 35
WYCKOFF STREET 6
WYONA STREET 5
WYTHE AVENUE 50
WYTHE PLACE 6
XENIA STREET 1
YATES AVENUE 5
YELLOWSTONE BOULEVARD 47
YORK AVENUE 89
YORK STREET 5
YOUNG AVENUE 2
ZEISER PLACE 2
ZEREGA AVENUE 28
ZOE STREET 2
ZULETTE AVENUE 5
Name: NUMBER_OF_PERSONS_INJURED, Length: 4241, dtype: int64
In [17]:
byonstreet['NUMBER_OF_PERSONS_KILLED'].count()
Out[17]:
ON_STREET_NAME
1 AVENUE 275
1 PLACE 1
1 STREET 11
10 AVENUE 358
10 STREET 12
100 AVENUE 5
100 DRIVE 1
100 STREET 21
101 AVENUE 89
101 ROAD 1
101 STREET 19
102 AVENUE 10
102 STREET 34
103 AVENUE 55
103 ROAD 1
...
WYCKOFF AVENUE 35
WYCKOFF STREET 6
WYONA STREET 5
WYTHE AVENUE 50
WYTHE PLACE 6
XENIA STREET 1
YATES AVENUE 5
YELLOWSTONE BOULEVARD 47
YORK AVENUE 89
YORK STREET 5
YOUNG AVENUE 2
ZEISER PLACE 2
ZEREGA AVENUE 28
ZOE STREET 2
ZULETTE AVENUE 5
Name: NUMBER_OF_PERSONS_KILLED, Length: 4241, dtype: int64
In [18]:
byboro_veh1 = df.groupby(['BOROUGH','VEHICLE_TYPE_CODE_1'])
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['BRONX','BROOKLYN']]
Out[18]:
BOROUGH VEHICLE_TYPE_CODE_1
BRONX AMBULANCE 56
BICYCLE 6
BUS 173
FIRE TRUCK 9
LARGE COM VEH(6 OR MORE TIRES) 118
LIVERY VEHICLE 188
MOTORCYCLE 8
OTHER 398
PASSENGER VEHICLE 4287
PICK-UP TRUCK 78
SCOOTER 1
SMALL COM VEH(4 TIRES) 112
SPORT UTILITY / STATION WAGON 1721
TAXI 114
UNKNOWN 372
VAN 213
BROOKLYN AMBULANCE 60
BICYCLE 11
BUS 361
FIRE TRUCK 27
LARGE COM VEH(6 OR MORE TIRES) 301
LIVERY VEHICLE 155
MOTORCYCLE 32
OTHER 707
PASSENGER VEHICLE 10331
PICK-UP TRUCK 225
SCOOTER 6
SMALL COM VEH(4 TIRES) 301
SPORT UTILITY / STATION WAGON 4852
TAXI 272
UNKNOWN 562
VAN 626
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64
In [19]:
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['MANHATTAN','STATEN ISLAND']]
Out[19]:
BOROUGH VEHICLE_TYPE_CODE_1
MANHATTAN AMBULANCE 59
BICYCLE 18
BUS 426
FIRE TRUCK 17
LARGE COM VEH(6 OR MORE TIRES) 388
LIVERY VEHICLE 442
MOTORCYCLE 47
OTHER 574
PASSENGER VEHICLE 5914
PEDICAB 1
PICK-UP TRUCK 349
SCOOTER 8
SMALL COM VEH(4 TIRES) 447
SPORT UTILITY / STATION WAGON 3085
TAXI 2263
UNKNOWN 318
VAN 833
STATEN ISLAND AMBULANCE 2
BUS 26
FIRE TRUCK 4
LARGE COM VEH(6 OR MORE TIRES) 3
LIVERY VEHICLE 3
MOTORCYCLE 4
OTHER 55
PASSENGER VEHICLE 1252
PICK-UP TRUCK 61
SMALL COM VEH(4 TIRES) 6
SPORT UTILITY / STATION WAGON 857
TAXI 1
UNKNOWN 67
VAN 47
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64
In [20]:
byboro_veh1['NUMBER_OF_PERSONS_INJURED'].count()[['QUEENS']]
Out[20]:
BOROUGH VEHICLE_TYPE_CODE_1
QUEENS AMBULANCE 29
BICYCLE 8
BUS 244
FIRE TRUCK 16
LARGE COM VEH(6 OR MORE TIRES) 261
LIVERY VEHICLE 132
MOTORCYCLE 26
OTHER 296
PASSENGER VEHICLE 9178
PICK-UP TRUCK 190
SCOOTER 2
SMALL COM VEH(4 TIRES) 213
SPORT UTILITY / STATION WAGON 4362
TAXI 210
UNKNOWN 549
VAN 399
Name: NUMBER_OF_PERSONS_INJURED, dtype: int64
In [20]:
Content source: yanyuchka/infovisproj-temp
Similar notebooks: