In [2]:
import pandas as pd

In [3]:
data = pd.read_csv("Accidents7904.csv")


C:\st\Anaconda\lib\site-packages\pandas\io\parsers.py:1139: DtypeWarning: Columns (13,31) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [8]:
list(data)


Out[8]:
['\xef\xbb\xbfAccident_Index',
 'Location_Easting_OSGR',
 'Location_Northing_OSGR',
 'Longitude',
 'Latitude',
 'Police_Force',
 'Accident_Severity',
 'Number_of_Vehicles',
 'Number_of_Casualties',
 'Date',
 'Day_of_Week',
 'Time',
 'Local_Authority_(District)',
 'Local_Authority_(Highway)',
 '1st_Road_Class',
 '1st_Road_Number',
 'Road_Type',
 'Speed_limit',
 'Junction_Detail',
 'Junction_Control',
 '2nd_Road_Class',
 '2nd_Road_Number',
 'Pedestrian_Crossing-Human_Control',
 'Pedestrian_Crossing-Physical_Facilities',
 'Light_Conditions',
 'Weather_Conditions',
 'Road_Surface_Conditions',
 'Special_Conditions_at_Site',
 'Carriageway_Hazards',
 'Urban_or_Rural_Area',
 'Did_Police_Officer_Attend_Scene_of_Accident',
 'LSOA_of_Accident_Location']

In [9]:
data2 = data[data.Number_of_Casualties > 10]
data2.head()


Out[9]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
9191 197901CJJGC15 290170 915000 NaN NaN 1 2 3 12 19/03/1979 ... -1 -1 1 8 1 -1 0 -1 -1 NaN
12352 197901DAVJD06 275770 136000 NaN NaN 1 2 1 16 10/04/1979 ... -1 -1 4 8 1 -1 0 -1 -1 NaN
47712 197901LEADX69 413790 289000 NaN NaN 1 2 3 12 14/12/1979 ... -1 -1 4 2 2 -1 0 -1 -1 NaN
51599 197903E102440 348000 538780 NaN NaN 3 1 2 25 07/04/1979 ... 0 0 6 1 2 0 0 -1 -1 NaN
53716 1979040BF0740 350320 447620 NaN NaN 4 3 2 11 19/06/1979 ... 0 0 1 1 1 4 0 -1 -1 NaN

5 rows × 32 columns


In [12]:
data3 = data2[data2.Number_of_Vehicles > 20]
data3.head()


Out[12]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
161828 1979420020053 573600 187600 NaN NaN 42 2 25 20 21/01/1979 ... 0 0 6 7 4 0 0 -1 -1 NaN
898168 19823382N0080 448120 310260 NaN NaN 33 1 35 21 18/02/1982 ... 0 0 1 7 2 0 0 -1 -1 NaN
1438603 198445BC00331 542740 154610 NaN NaN 45 1 26 22 11/12/1984 ... 0 0 6 7 2 0 0 -1 -1 NaN
1644332 198534BD00087 457440 274200 NaN NaN 34 1 27 15 21/01/1985 ... 0 0 1 2 2 0 0 -1 -1 NaN
1840183 198613EA11586 409200 418150 NaN NaN 13 2 51 15 24/03/1986 ... 0 0 1 7 2 0 0 -1 -1 NaN

5 rows × 32 columns


In [18]:
data4  = data3[(data3.Weather_Conditions == 2) | (data3.Weather_Conditions == 5)]
data4.head()


Out[18]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
1644332 198534BD00087 457440 274200 NaN NaN 34 1 27 15 21/01/1985 ... 0 0 1 2 2 0 0 -1 -1 NaN
2072675 1987077M10739 364600 390600 NaN NaN 7 3 23 21 08/11/1987 ... 0 0 4 2 2 0 0 -1 -1 NaN
3948125 199501JO00307 546500 200010 NaN NaN 1 2 22 15 17/09/1995 ... 0 0 1 2 2 0 0 2 -1 NaN
4320578 19964100T1118 509130 215230 NaN NaN 41 2 29 20 10/08/1996 ... 0 0 1 2 2 0 0 2 -1 NaN
4815403 1998440BB0404 467100 151900 NaN NaN 44 3 32 24 05/04/1998 ... 0 0 1 2 2 0 0 1 -1 NaN

5 rows × 32 columns


In [19]:
data5 = data4[data.Light_Conditions == 5]
data5.head()
# No values


C:\st\Anaconda\lib\site-packages\pandas\core\frame.py:1706: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[19]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location

0 rows × 32 columns


In [21]:
data5 = data4[data.Light_Conditions == 4]
data5.head()


Out[21]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
2072675 1987077M10739 364600 390600 NaN NaN 7 3 23 21 08/11/1987 ... 0 0 4 2 2 0 0 -1 -1 NaN

1 rows × 32 columns


In [26]:
# Incidents when more than 4 people died due to snow in London
data6 = data[(data.Weather_Conditions == 3) & (data.Number_of_Casualties > 5) & (data.Police_Force == 1)]
print len(data6)
data6.head()


4
Out[26]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
49143 197901LLTJD05 263470 545000 NaN NaN 1 3 4 8 21/12/1979 ... -1 -1 4 3 2 -1 0 -1 -1 NaN
1025877 1983010KG0202 548670 185730 NaN NaN 1 2 2 7 02/04/1983 ... 0 0 4 3 2 0 0 -1 -1 NaN
1514727 198501HD00027 535410 181580 NaN NaN 1 2 2 7 09/02/1985 ... 0 0 1 3 3 0 0 -1 -1 NaN
3033694 199101TC00731 519000 178490 NaN NaN 1 2 1 6 31/12/1991 ... 0 0 4 3 2 0 0 -1 -1 NaN

4 rows × 32 columns


In [ ]: