In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("Accidents7904.csv")


C:\st\Anaconda\lib\site-packages\pandas\io\parsers.py:1139: DtypeWarning: Columns (13,31) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [3]:
print type(data)


<class 'pandas.core.frame.DataFrame'>

In [4]:
list(data)


Out[4]:
['\xef\xbb\xbfAccident_Index',
 'Location_Easting_OSGR',
 'Location_Northing_OSGR',
 'Longitude',
 'Latitude',
 'Police_Force',
 'Accident_Severity',
 'Number_of_Vehicles',
 'Number_of_Casualties',
 'Date',
 'Day_of_Week',
 'Time',
 'Local_Authority_(District)',
 'Local_Authority_(Highway)',
 '1st_Road_Class',
 '1st_Road_Number',
 'Road_Type',
 'Speed_limit',
 'Junction_Detail',
 'Junction_Control',
 '2nd_Road_Class',
 '2nd_Road_Number',
 'Pedestrian_Crossing-Human_Control',
 'Pedestrian_Crossing-Physical_Facilities',
 'Light_Conditions',
 'Weather_Conditions',
 'Road_Surface_Conditions',
 'Special_Conditions_at_Site',
 'Carriageway_Hazards',
 'Urban_or_Rural_Area',
 'Did_Police_Officer_Attend_Scene_of_Accident',
 'LSOA_of_Accident_Location']

In [5]:
data['\xef\xbb\xbfAccident_Index'][:5]


Out[5]:
0    197901A11AD14
1    197901A1BAW34
2    197901A1BFD77
3    197901A1BGC20
4    197901A1BGF95
Name: Accident_Index, dtype: object

In [6]:
data_london = data[data['Police_Force'] == 1]
print data_london[:5]


  Accident_Index  Location_Easting_OSGR  Location_Northing_OSGR  Longitude  \
0   197901A11AD14                    NaN                     NaN        NaN   
1   197901A1BAW34                 198460                  894000        NaN   
2   197901A1BFD77                 406380                  307000        NaN   
3   197901A1BGC20                 281680                  440000        NaN   
4   197901A1BGF95                 153960                  795000        NaN   

   Latitude  Police_Force  Accident_Severity  Number_of_Vehicles  \
0       NaN             1                  3                   2   
1       NaN             1                  3                   1   
2       NaN             1                  3                   2   
3       NaN             1                  3                   2   
4       NaN             1                  2                   2   

   Number_of_Casualties        Date     ...      \
0                     1  18/01/1979     ...       
1                     1  01/01/1979     ...       
2                     3  01/01/1979     ...       
3                     2  01/01/1979     ...       
4                     1  01/01/1979     ...       

   Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities  \
0                                 -1                                      -1   
1                                 -1                                      -1   
2                                 -1                                      -1   
3                                 -1                                      -1   
4                                 -1                                      -1   

   Light_Conditions Weather_Conditions  Road_Surface_Conditions  \
0                 1                  8                        1   
1                 4                  8                        3   
2                 4                  8                        3   
3                 4                  8                        3   
4                 4                  3                        3   

   Special_Conditions_at_Site  Carriageway_Hazards  Urban_or_Rural_Area  \
0                          -1                    0                   -1   
1                          -1                    0                   -1   
2                          -1                    0                   -1   
3                          -1                    0                   -1   
4                          -1                    0                   -1   

   Did_Police_Officer_Attend_Scene_of_Accident  LSOA_of_Accident_Location  
0                                           -1                        NaN  
1                                           -1                        NaN  
2                                           -1                        NaN  
3                                           -1                        NaN  
4                                           -1                        NaN  

[5 rows x 32 columns]

In [7]:
type(data_london['Date'][0])


Out[7]:
str

In [8]:
data_london['Date'] = pd.to_datetime(data_london['Date'])


-c:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead

In [9]:
data_london['Date'][:5]


Out[9]:
0   1979-01-18
1   1979-01-01
2   1979-01-01
3   1979-01-01
4   1979-01-01
Name: Date, dtype: datetime64[ns]

In [10]:
data_london['Date'][0] > pd.to_datetime('2000-01-01')


Out[10]:
False

In [11]:
data_london['Date'][0] > pd.to_datetime('1979-01-01')


Out[11]:
True

In [12]:
data_london[:1]


Out[12]:
Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date ... Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
0 197901A11AD14 NaN NaN NaN NaN 1 3 2 1 1979-01-18 ... -1 -1 1 8 1 -1 0 -1 -1 NaN

1 rows × 32 columns


In [13]:
#print type(data_london['Date'])
#print data_london['Date'][0]
data_london_new = data_london[data_london['Date'] > pd.to_datetime('2000-01-01')]
print len(data_london_new)
print data_london_new[:5]


167598
        Accident_Index  Location_Easting_OSGR  Location_Northing_OSGR  \
5118264   2000010SU0982                 522270                  200330   
5118265   2000010SU0983                 536010                  204970   
5118266   2000010SU0984                 519480                  204200   
5118267   2000010SU0985                 520760                  202280   
5118268   2000010SU0986                 523250                  199890   

         Longitude   Latitude  Police_Force  Accident_Severity  \
5118264  -0.232572  51.688371             1                  3   
5118265  -0.032110  51.726908             1                  3   
5118266  -0.271588  51.723752             1                  3   
5118267  -0.253731  51.706222             1                  3   
5118268  -0.218557  51.684203             1                  3   

         Number_of_Vehicles  Number_of_Casualties       Date    ...      \
5118264                   4                     3 2000-03-21    ...       
5118265                   2                     1 2000-09-03    ...       
5118266                   4                     1 2000-03-23    ...       
5118267                   3                     1 2000-03-15    ...       
5118268                   3                     1 2000-06-03    ...       

         Pedestrian_Crossing-Human_Control  \
5118264                                  0   
5118265                                  0   
5118266                                  0   
5118267                                  0   
5118268                                  0   

        Pedestrian_Crossing-Physical_Facilities  Light_Conditions  \
5118264                                       0                 1   
5118265                                       0                 1   
5118266                                       0                 1   
5118267                                       0                 1   
5118268                                       0                 1   

        Weather_Conditions  Road_Surface_Conditions  \
5118264                  1                        1   
5118265                  1                        1   
5118266                  1                        1   
5118267                  1                        1   
5118268                  1                        1   

         Special_Conditions_at_Site  Carriageway_Hazards  Urban_or_Rural_Area  \
5118264                           0                    0                    2   
5118265                           0                    0                    2   
5118266                           0                    0                    2   
5118267                           0                    0                    2   
5118268                           0                    0                    2   

         Did_Police_Officer_Attend_Scene_of_Accident  \
5118264                                            1   
5118265                                            1   
5118266                                            1   
5118267                                            1   
5118268                                            1   

         LSOA_of_Accident_Location  
5118264                  E01023584  
5118265                  E01023310  
5118266                  E01023584  
5118267                  E01023584  
5118268                  E01023584  

[5 rows x 32 columns]

In [14]:
data_london_new['Time'][:5]


Out[14]:
5118264    08:20
5118265    10:25
5118266    13:06
5118267    08:40
5118268    11:20
Name: Time, dtype: object

In [15]:
print (data_london_new['Time'].iloc[1])
print type(data_london_new['Time'].iloc[1])


10:25
<type 'str'>

In [16]:
pd.to_datetime(data_london['Time'].iloc[0])


Out[16]:
Timestamp('2015-02-11 08:00:00')

In [17]:
data_london.iloc[0]


Out[17]:
Accident_Index                                      197901A11AD14
Location_Easting_OSGR                                          NaN
Location_Northing_OSGR                                         NaN
Longitude                                                      NaN
Latitude                                                       NaN
Police_Force                                                     1
Accident_Severity                                                3
Number_of_Vehicles                                               2
Number_of_Casualties                                             1
Date                                           1979-01-18 00:00:00
Day_of_Week                                                      5
Time                                                         08:00
Local_Authority_(District)                                      11
Local_Authority_(Highway)                                     9999
1st_Road_Class                                                   3
1st_Road_Number                                                  4
Road_Type                                                        1
Speed_limit                                                     30
Junction_Detail                                                  1
Junction_Control                                                 4
2nd_Road_Class                                                  -1
2nd_Road_Number                                                 -1
Pedestrian_Crossing-Human_Control                               -1
Pedestrian_Crossing-Physical_Facilities                         -1
Light_Conditions                                                 1
Weather_Conditions                                               8
Road_Surface_Conditions                                          1
Special_Conditions_at_Site                                      -1
Carriageway_Hazards                                              0
Urban_or_Rural_Area                                             -1
Did_Police_Officer_Attend_Scene_of_Accident                     -1
LSOA_of_Accident_Location                                      NaN
Name: 0, dtype: object

In [59]:
data_london_new2 = data_london_new[['Date', 'Time', 'Number_of_Casualties']]
data_london_new2.head()


Out[59]:
Date Time Number_of_Casualties
5118264 2000-03-21 08:20 3
5118265 2000-09-03 10:25 1
5118266 2000-03-23 13:06 1
5118267 2000-03-15 08:40 1
5118268 2000-06-03 11:20 1

In [60]:
data_london_new2.sort('Date', inplace=True)
#data_london_new2.set_index('Date', inplace=True)
data_london_new2.head()


C:\st\Anaconda\lib\site-packages\pandas\core\frame.py:2679: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame
  inplace=inplace, kind=kind, na_position=na_position)
Out[60]:
Date Time Number_of_Casualties
5119933 2000-01-02 18:15 1
5123755 2000-01-02 20:22 1
5123741 2000-01-02 08:05 1
5132053 2000-01-02 18:05 1
5150304 2000-01-02 15:30 2

In [61]:
data_london_new2.sort('Time', inplace=True)
#data_london_new2.set_index('Date', inplace=True)
data_london_new2.head()


Out[61]:
Date Time Number_of_Casualties
5829898 2003-01-31 00:01 2
5360237 2001-11-21 00:01 1
5582938 2002-07-06 00:01 1
5132218 2000-02-28 00:01 1
5829674 2003-12-21 00:01 2

In [63]:
writer = pd.ExcelWriter('output.xlsx')
data_london_new2.to_excel(writer,'Sheet1')
writer.save()

In [65]:


In [ ]: