In [2]:
import pandas as pd
In [3]:
data = pd.read_csv("Accidents7904.csv")
C:\st\Anaconda\lib\site-packages\pandas\io\parsers.py:1139: DtypeWarning: Columns (13,31) have mixed types. Specify dtype option on import or set low_memory=False.
data = self._reader.read(nrows)
In [8]:
list(data)
Out[8]:
['\xef\xbb\xbfAccident_Index',
'Location_Easting_OSGR',
'Location_Northing_OSGR',
'Longitude',
'Latitude',
'Police_Force',
'Accident_Severity',
'Number_of_Vehicles',
'Number_of_Casualties',
'Date',
'Day_of_Week',
'Time',
'Local_Authority_(District)',
'Local_Authority_(Highway)',
'1st_Road_Class',
'1st_Road_Number',
'Road_Type',
'Speed_limit',
'Junction_Detail',
'Junction_Control',
'2nd_Road_Class',
'2nd_Road_Number',
'Pedestrian_Crossing-Human_Control',
'Pedestrian_Crossing-Physical_Facilities',
'Light_Conditions',
'Weather_Conditions',
'Road_Surface_Conditions',
'Special_Conditions_at_Site',
'Carriageway_Hazards',
'Urban_or_Rural_Area',
'Did_Police_Officer_Attend_Scene_of_Accident',
'LSOA_of_Accident_Location']
In [9]:
data2 = data[data.Number_of_Casualties > 10]
data2.head()
Out[9]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
9191
197901CJJGC15
290170
915000
NaN
NaN
1
2
3
12
19/03/1979
...
-1
-1
1
8
1
-1
0
-1
-1
NaN
12352
197901DAVJD06
275770
136000
NaN
NaN
1
2
1
16
10/04/1979
...
-1
-1
4
8
1
-1
0
-1
-1
NaN
47712
197901LEADX69
413790
289000
NaN
NaN
1
2
3
12
14/12/1979
...
-1
-1
4
2
2
-1
0
-1
-1
NaN
51599
197903E102440
348000
538780
NaN
NaN
3
1
2
25
07/04/1979
...
0
0
6
1
2
0
0
-1
-1
NaN
53716
1979040BF0740
350320
447620
NaN
NaN
4
3
2
11
19/06/1979
...
0
0
1
1
1
4
0
-1
-1
NaN
5 rows × 32 columns
In [12]:
data3 = data2[data2.Number_of_Vehicles > 20]
data3.head()
Out[12]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
161828
1979420020053
573600
187600
NaN
NaN
42
2
25
20
21/01/1979
...
0
0
6
7
4
0
0
-1
-1
NaN
898168
19823382N0080
448120
310260
NaN
NaN
33
1
35
21
18/02/1982
...
0
0
1
7
2
0
0
-1
-1
NaN
1438603
198445BC00331
542740
154610
NaN
NaN
45
1
26
22
11/12/1984
...
0
0
6
7
2
0
0
-1
-1
NaN
1644332
198534BD00087
457440
274200
NaN
NaN
34
1
27
15
21/01/1985
...
0
0
1
2
2
0
0
-1
-1
NaN
1840183
198613EA11586
409200
418150
NaN
NaN
13
2
51
15
24/03/1986
...
0
0
1
7
2
0
0
-1
-1
NaN
5 rows × 32 columns
In [18]:
data4 = data3[(data3.Weather_Conditions == 2) | (data3.Weather_Conditions == 5)]
data4.head()
Out[18]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
1644332
198534BD00087
457440
274200
NaN
NaN
34
1
27
15
21/01/1985
...
0
0
1
2
2
0
0
-1
-1
NaN
2072675
1987077M10739
364600
390600
NaN
NaN
7
3
23
21
08/11/1987
...
0
0
4
2
2
0
0
-1
-1
NaN
3948125
199501JO00307
546500
200010
NaN
NaN
1
2
22
15
17/09/1995
...
0
0
1
2
2
0
0
2
-1
NaN
4320578
19964100T1118
509130
215230
NaN
NaN
41
2
29
20
10/08/1996
...
0
0
1
2
2
0
0
2
-1
NaN
4815403
1998440BB0404
467100
151900
NaN
NaN
44
3
32
24
05/04/1998
...
0
0
1
2
2
0
0
1
-1
NaN
5 rows × 32 columns
In [19]:
data5 = data4[data.Light_Conditions == 5]
data5.head()
# No values
C:\st\Anaconda\lib\site-packages\pandas\core\frame.py:1706: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
"DataFrame index.", UserWarning)
Out[19]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
0 rows × 32 columns
In [21]:
data5 = data4[data.Light_Conditions == 4]
data5.head()
Out[21]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
2072675
1987077M10739
364600
390600
NaN
NaN
7
3
23
21
08/11/1987
...
0
0
4
2
2
0
0
-1
-1
NaN
1 rows × 32 columns
In [26]:
# Incidents when more than 4 people died due to snow in London
data6 = data[(data.Weather_Conditions == 3) & (data.Number_of_Casualties > 5) & (data.Police_Force == 1)]
print len(data6)
data6.head()
4
Out[26]:
Accident_Index
Location_Easting_OSGR
Location_Northing_OSGR
Longitude
Latitude
Police_Force
Accident_Severity
Number_of_Vehicles
Number_of_Casualties
Date
...
Pedestrian_Crossing-Human_Control
Pedestrian_Crossing-Physical_Facilities
Light_Conditions
Weather_Conditions
Road_Surface_Conditions
Special_Conditions_at_Site
Carriageway_Hazards
Urban_or_Rural_Area
Did_Police_Officer_Attend_Scene_of_Accident
LSOA_of_Accident_Location
49143
197901LLTJD05
263470
545000
NaN
NaN
1
3
4
8
21/12/1979
...
-1
-1
4
3
2
-1
0
-1
-1
NaN
1025877
1983010KG0202
548670
185730
NaN
NaN
1
2
2
7
02/04/1983
...
0
0
4
3
2
0
0
-1
-1
NaN
1514727
198501HD00027
535410
181580
NaN
NaN
1
2
2
7
09/02/1985
...
0
0
1
3
3
0
0
-1
-1
NaN
3033694
199101TC00731
519000
178490
NaN
NaN
1
2
1
6
31/12/1991
...
0
0
4
3
2
0
0
-1
-1
NaN
4 rows × 32 columns
In [ ]:
Content source: shantnu/data_mine
Similar notebooks: