notebook.community

Edit and run



In [285]:

    
import numpy as np
import pandas as pd
# import tensorflow as tf
from matplotlib import pyplot as plt
import  datetime
import os
import gzip
%matplotlib inline

from sklearn.preprocessing import OneHotEncoder, LabelEncoder, LabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import RobustScaler



In [58]:

    
waze = pd.read_csv("../waze (4).csv", sep=";")
print(waze['subtype'].unique())









    



['ROAD_CLOSED_EVENT' 'HAZARD_ON_ROAD_CONSTRUCTION'
 'HAZARD_ON_SHOULDER_CAR_STOPPED' nan 'POLICE_VISIBLE' 'ACCIDENT_MINOR'
 'HAZARD_ON_ROAD_CAR_STOPPED' 'HAZARD_ON_ROAD_LANE_CLOSED'
 'ROAD_CLOSED_HAZARD' 'POLICE_HIDING' 'HAZARD_ON_ROAD_OBJECT'
 'HAZARD_ON_SHOULDER_ANIMALS' 'ACCIDENT_MAJOR' 'HAZARD_ON_SHOULDER'
 'HAZARD_ON_ROAD_POT_HOLE' 'HAZARD_WEATHER'
 'HAZARD_ON_SHOULDER_MISSING_SIGN' 'HAZARD_ON_ROAD_ROAD_KILL'
 'HAZARD_ON_ROAD' 'ROAD_CLOSED_CONSTRUCTION' 'HAZARD_ON_ROAD_ICE']



In [67]:

    
waze.columns









    Out[67]:





Index(['WID', 'area', 'publicatedOn', 'updatedOn', 'closedOn', 'country',
       'nThumbsUp', 'city', 'reportRating', 'reliability', 'type', 'uuid',
       'speed', 'reportMood', 'subtype', 'street', 'id', 'additionalInfo',
       'nearBy', 'roadType', 'wazeid', 'nComments', 'reportBy', 'inscale',
       'comments', 'isThumbsUp', 'isJamUnifiedAlert', 'confidence', 'nImages',
       'magvar', 'showFacebookPic', 'wazeData', 'reportDescription', 'x', 'y',
       'pubMillis', 'provider', 'providerId', 'imageUrl', 'imageId',
       'reportByActiveGroupName', 'reportByActiveGroupIcon', 'missing',
       'hour'],
      dtype='object')



In [69]:

    
w_g = waze.groupby(['x', 'y'])['area'].count()



In [70]:

    
plt.plot(waze['x'], waze['y'], ".")
plt.show()



In [62]:

    
waze['reportDescription'].unique()









    Out[62]:





array(['Storing',
       'Calandbrug over Calandkanaal. Open sinds: 00:09 uur. Gemiddeld open: 15 minuten.',
       'Calandbrug over Calandkanaal. Open sinds: 00:25 uur. Gemiddeld open: 15 minuten.',
       ..., 'Voorwerp op de weg: A4 bij hmp 71,4',
       'Calandbrug over Calandkanaal. Open sinds: 23:41 uur. Gemiddeld open: 16 minuten.',
       'Roparun'], dtype=object)



In [63]:

    
waze['publicatedOn'] = pd.to_datetime(waze['publicatedOn'], format="%Y%m%d%H%M%S")

waze["hour"] = waze.publicatedOn.dt.hour

plt.hist(waze.loc[waze['subtype'] == 'ACCIDENT_MAJOR']["hour"], bins=23)
waze.columns = ['WID', 'area', 'publicatedOn', 'updatedOn', 'closedOn', 'country',
       'nThumbsUp', 'city', 'reportRating', 'reliability', 'type', 'uuid',
       'speed', 'reportMood', 'subtype', 'street', 'id', 'additionalInfo',
       'nearBy', 'roadType', 'wazeid', 'nComments', 'reportBy', 'inscale',
       'comments', 'isThumbsUp', 'isJamUnifiedAlert', 'confidence', 'nImages',
       'magvar', 'showFacebookPic', 'wazeData', 'reportDescription',
       'x', 'y', 'pubMillis', 'provider', 'providerId',
       'imageUrl', 'imageId', 'reportByActiveGroupName',
       'reportByActiveGroupIcon', 'missing', 'hour']



In [50]:

    
incidents = pd.read_excel("../Incidents_WPH_Car.xlsx")



In [51]:

    
incidents.head()









    Out[51]:







  
    
      
      datum
      gemelde locatie
      werkelijke locatie
      tijdstip melding
      tijdstip aankomst
    
  
  
    
      0
      2017-05-22
      16 L 20.5
      16 L 20.68
      06:37:16
      06:57:27
    
    
      1
      2017-05-22
      15 R 60.8
      29 L 9.56 s
      06:49:51
      06:49:26
    
    
      2
      2017-05-22
      15 R 53.3 a
      15 R 53.27 a
      07:18:40
      07:30:07
    
    
      3
      2017-05-22
      20 L 29.6
      20 L 29.69 s
      07:23:40
      07:36:28
    
    
      4
      2017-05-22
      20 L 34.3
      20 L 34.25 v
      07:24:07
      07:29:15



In [23]:

    
flitsmeister = pd.read_csv("../flitsmeister week 21 22/2017-05-22_17.csv")



In [24]:

    
flitsmeister.head()









    Out[24]:







  
    
      
      RowID
      SessionID
      Latitude
      Longitude
      Bearing
      Speed
      created_at
      geohash
      SessionCount
    
  
  
    
      0
      456393953
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864525
      4.526764
      284
      46
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
    
    
      1
      456393954
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864556
      4.526617
      286
      39
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
    
    
      2
      456393955
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864584
      4.526479
      287
      37
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
    
    
      3
      456393956
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864581
      4.526385
      285
      28
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
    
    
      4
      456393957
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864601
      4.526271
      286
      28
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pncx
      NaN



In [26]:

    
flitsmeister.shape









    Out[26]:





(2094754, 9)



In [28]:

    
flitsmeister["SessionID"].nunique()









    Out[28]:





4617



In [31]:

    
plt.hist(flitsmeister["Speed"], bins=35)









    Out[31]:





(array([  1.83640000e+05,   1.19281000e+05,   1.14670000e+05,
          1.01321000e+05,   9.55420000e+04,   1.05778000e+05,
          8.79620000e+04,   8.17410000e+04,   7.74440000e+04,
          8.09580000e+04,   1.20480000e+05,   1.31225000e+05,
          1.50358000e+05,   1.89395000e+05,   1.88347000e+05,
          1.43809000e+05,   7.21450000e+04,   3.29810000e+04,
          1.24630000e+04,   3.30700000e+03,   8.80000000e+02,
          3.12000000e+02,   1.44000000e+02,   6.20000000e+01,
          3.70000000e+01,   2.10000000e+01,   6.40000000e+01,
          9.00000000e+01,   1.06000000e+02,   4.80000000e+01,
          6.90000000e+01,   5.30000000e+01,   2.00000000e+01,
          0.00000000e+00,   1.00000000e+00]),
 array([   0. ,    7.2,   14.4,   21.6,   28.8,   36. ,   43.2,   50.4,
          57.6,   64.8,   72. ,   79.2,   86.4,   93.6,  100.8,  108. ,
         115.2,  122.4,  129.6,  136.8,  144. ,  151.2,  158.4,  165.6,
         172.8,  180. ,  187.2,  194.4,  201.6,  208.8,  216. ,  223.2,
         230.4,  237.6,  244.8,  252. ]),
 <a list of 35 Patch objects>)



In [41]:

    
flitsmeister['time'] = pd.to_datetime(flitsmeister['created_at'])



In [64]:

    
flitsmeister[np.array(flitsmeister['time'] <= datetime.datetime(2017, 5, 22, 17, 8, 3))].head()









    Out[64]:







  
    
      
      RowID
      SessionID
      Latitude
      Longitude
      Bearing
      Speed
      created_at
      geohash
      SessionCount
      time
    
  
  
    
      0
      456393953
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864525
      4.526764
      284
      46
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
      2017-05-22 16:58:03
    
    
      1
      456393954
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864556
      4.526617
      286
      39
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
      2017-05-22 16:58:03
    
    
      2
      456393955
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864584
      4.526479
      287
      37
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
      2017-05-22 16:58:03
    
    
      3
      456393956
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864581
      4.526385
      285
      28
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pp18
      NaN
      2017-05-22 16:58:03
    
    
      4
      456393957
      FC847645-41FA-4C24-AB47-134042D9C1FB
      51.864601
      4.526271
      286
      28
      Mon May 22 2017 14:58:03 GMT+0200 (CEST)
      u15pncx
      NaN
      2017-05-22 16:58:03



In [29]:

    
incident = pd.read_csv("../incident_with_xy.csv")
incident.columns = ['Unnamed: 0', 'V1', 'date', 'report_time', 'gemelde.locatie', 'y', 'x']
incident['publicatedOn'] = pd.to_datetime(incident['date'] + " " +incident['report_time'])



In [33]:

    
plt.plot(incident["y"], incident["x"], ".")
plt.show()



In [71]:

    
plt.plot(waze['x'], waze['y'], ".")
plt.show()



In [35]:

    
waze['subtype'].unique()









    Out[35]:





array(['ROAD_CLOSED_EVENT', 'HAZARD_ON_ROAD_CONSTRUCTION',
       'HAZARD_ON_SHOULDER_CAR_STOPPED', nan, 'POLICE_VISIBLE',
       'ACCIDENT_MINOR', 'HAZARD_ON_ROAD_CAR_STOPPED',
       'HAZARD_ON_ROAD_LANE_CLOSED', 'ROAD_CLOSED_HAZARD', 'POLICE_HIDING',
       'HAZARD_ON_ROAD_OBJECT', 'HAZARD_ON_SHOULDER_ANIMALS',
       'ACCIDENT_MAJOR', 'HAZARD_ON_SHOULDER', 'HAZARD_ON_ROAD_POT_HOLE',
       'HAZARD_WEATHER', 'HAZARD_ON_SHOULDER_MISSING_SIGN',
       'HAZARD_ON_ROAD_ROAD_KILL', 'HAZARD_ON_ROAD',
       'ROAD_CLOSED_CONSTRUCTION', 'HAZARD_ON_ROAD_ICE'], dtype=object)



In [44]:

    
waze['publicatedOn'].min()









    Out[44]:





Timestamp('2017-05-21 00:08:55')



In [320]:

    
incident.head()









    Out[320]:







  
    
      
      Unnamed: 0
      V1
      date
      report_time
      gemelde.locatie
      y
      x
      publicated_on
      publicatedOn
    
  
  
    
      0
      1
      0
      2017-05-23
      09:30:12.999000
      27/37,9
      51.855369
      4.954711
      2017-05-23 09:30:12.999
      2017-05-23 09:30:12.999
    
    
      1
      2
      1
      2017-05-24
      07:49:54.999000
      15/95,8
      51.846133
      4.950630
      2017-05-24 07:49:54.999
      2017-05-24 07:49:54.999
    
    
      2
      3
      2
      2017-05-24
      10:08:08.999000
      15/102,5
      51.839749
      5.047208
      2017-05-24 10:08:08.999
      2017-05-24 10:08:08.999
    
    
      3
      4
      3
      2017-05-26
      17:44:37
      15/65,3
      51.866261
      4.584767
      2017-05-26 17:44:37.000
      2017-05-26 17:44:37.000
    
    
      4
      5
      4
      2017-05-26
      20:26:48.999000
      15/79,1
      51.837240
      4.721092
      2017-05-26 20:26:48.999
      2017-05-26 20:26:48.999



In [51]:

    
pd.to_datetime(incident['date']).max()









    Out[51]:





Timestamp('2017-06-04 00:00:00')



In [72]:

    
print(waze["x"].min(), waze["y"].max())









    



4.22436306271 51.955889



In [74]:

    
print(waze["x"].max(), waze["y"].min())









    



4.576501 51.862621



In [75]:

    
diff_x = waze["x"].max() - waze["x"].min()
diff_y = waze["y"].max() - waze["y"].min()



In [76]:

    
diff_x /= 10
diff_y /= 10



In [77]:

    
x_co = [waze["x"].min()+diff_x*i for i in range(11)]



In [78]:

    
y_co = [waze["y"].max()-diff_y*i for i in range(11)]



In [79]:

    
counter = 1
dict_coord = {}
for y in range(1,10):
    for x in range(1, 10):
        dict_coord[x, y] = counter
        counter +=1



In [80]:

    
def grider(row):
    for i, x in enumerate(x_co[:-1]):
        if x <= row['x'] <= x_co[i+1]:
            x_op = i

    for i, y in enumerate(y_co[:-1]):
        if y >= row['y'] >= y_co[i+1]:
            y_op = i

    try:
        num = dict_coord[(x_op, y_op)]
    except:
        num = None
    return num



In [331]:

    
data_train = pd.DataFrame()
for index, row in waze.iterrows():
    if grider(row) != None:
            data_train = data_train.append(pd.DataFrame([1, grider(row), row['publicatedOn']]).T)



In [334]:

    
data_train.to_csv("../feature_for_grid.csv")



In [333]:

    
data_train.head()









    Out[333]:







  
    
      
      0
      1
      2
    
  
  
    
      0
      1
      52
      2017-05-21 00:08:55
    
    
      0
      1
      22
      2017-05-21 00:42:35
    
    
      0
      1
      69
      2017-05-21 01:25:09
    
    
      0
      1
      78
      2017-05-21 01:25:47
    
    
      0
      1
      23
      2017-05-21 04:16:59



In [208]:

    
temp = pd.read_csv("../flitsmeister week 21 22/2017-05-22_00.csv")



In [ ]:

    
#func = {'Speed':['mean', 'max', "std", "min"], 'SessionID':['nunique']}
features = pd.DataFrame()
for file in os.listdir("../flitsmeister week 21 22/"):
    if file.endswith(".gz"):
        with gzip.open("../flitsmeister week 21 22/" + file, "r") as f:
            try:
                temp = pd.read_csv(f)
                temp.columns = ['RowID', 'SessionID', 'y', 'x', 'Bearing', 'Speed',
           'created_at', 'geohash', 'SessionCount']
                #temp["grid"] = temp.apply(lambda x: grider(x), axis=1)
                #to_concat = temp.groupby(["grid"])["Speed"].agg(func)
                #to_concat = pd.DataFrame(to_concat.reset_index().as_matrix(), columns=['grid', "speed_mean", "speed_max", "speed_std", "speed_min", "count" ])
                func = {'Speed':['mean', 'max', "std", "min"], 'y':['mean'], "x":['mean']}
                temp_agg = temp.groupby(["SessionID"]).agg(func)
                temp_agg = pd.DataFrame(temp_agg.as_matrix(), columns=['speed_mean', 'speed_max', "speed_std", "speed_min",
                                                           "y", "x"])
                temp_agg["grid"] = -1
                for ix, row in temp_agg.iterrows():
                    temp_agg.loc[ix, "grid"] = grider(row)
                temp_agg["date"] = file.split(".")[0]
                print(file.split(".")[0])
                features = features.append(temp_agg)
                print("Finished with " + file)
            except:
                continue



In [367]:

    
#features.to_csv("../features.csv")



In [5]:

    
features = pd.read_csv("../features.csv")
features.head(10)









    Out[5]:







  
    
      
      Unnamed: 0
      speed_mean
      speed_max
      speed_std
      speed_min
      y
      x
      grid
      date
    
  
  
    
      0
      0
      107.674931
      146.0
      27.909145
      66.0
      51.923162
      4.504515
      25.0
      2017-05-22_00
    
    
      1
      1
      139.439560
      178.0
      17.145918
      115.0
      51.876314
      4.572905
      72.0
      2017-05-22_00
    
    
      2
      2
      119.498054
      141.0
      20.024423
      19.0
      51.885456
      4.564509
      63.0
      2017-05-22_00
    
    
      3
      3
      101.987520
      118.0
      9.905010
      0.0
      51.883272
      4.425730
      59.0
      2017-05-22_00
    
    
      4
      4
      63.180233
      82.0
      18.516851
      17.0
      51.937475
      4.431121
      5.0
      2017-05-22_00
    
    
      5
      5
      121.197107
      134.0
      12.097694
      82.0
      51.878232
      4.466523
      69.0
      2017-05-22_00
    
    
      6
      6
      75.811111
      83.0
      3.934561
      66.0
      51.934747
      4.449704
      15.0
      2017-05-22_00
    
    
      7
      7
      137.302885
      151.0
      9.648889
      108.0
      51.913590
      4.543788
      36.0
      2017-05-22_00
    
    
      8
      8
      75.790123
      87.0
      3.700650
      67.0
      51.939301
      4.429959
      5.0
      2017-05-22_00
    
    
      9
      9
      114.485591
      203.0
      26.753035
      59.0
      51.921418
      4.508817
      26.0
      2017-05-22_00



In [7]:

    
times = pd.to_datetime(features["date"], format="%Y-%m-%d_%H")



In [18]:

    
times_hour = times.dt.strftime("%Y-%m-%d %H")



In [19]:

    
features["time"] = times_hour



In [126]:

    
features.head()









    Out[126]:







  
    
      
      Unnamed: 0
      speed_mean
      speed_max
      speed_std
      speed_min
      y
      x
      grid
      date
      time
    
  
  
    
      0
      0
      107.674931
      146.0
      27.909145
      66.0
      51.923162
      4.504515
      25.0
      2017-05-22_00
      2017-05-22 00
    
    
      1
      1
      139.439560
      178.0
      17.145918
      115.0
      51.876314
      4.572905
      72.0
      2017-05-22_00
      2017-05-22 00
    
    
      2
      2
      119.498054
      141.0
      20.024423
      19.0
      51.885456
      4.564509
      63.0
      2017-05-22_00
      2017-05-22 00
    
    
      3
      3
      101.987520
      118.0
      9.905010
      0.0
      51.883272
      4.425730
      59.0
      2017-05-22_00
      2017-05-22 00
    
    
      4
      4
      63.180233
      82.0
      18.516851
      17.0
      51.937475
      4.431121
      5.0
      2017-05-22_00
      2017-05-22 00



In [134]:

    
features.groupby(["time", "grid"]).agg(func).reset_index().head()









    Out[134]:







  
    
      
      time
      grid
      speed_mean
      speed_max
      speed_std
      speed_min
      date
    
    
      
      
      
      mean
      mean
      mean
      mean
      count
    
  
  
    
      0
      2017-05-22 00
      3.0
      98.000000
      145.000000
      62.707940
      0.000000
      1
    
    
      1
      2017-05-22 00
      5.0
      74.076514
      93.416667
      9.276480
      52.833333
      12
    
    
      2
      2017-05-22 00
      6.0
      76.518359
      97.571429
      15.152162
      36.428571
      7
    
    
      3
      2017-05-22 00
      7.0
      80.315252
      117.916667
      21.320476
      31.166667
      12
    
    
      4
      2017-05-22 00
      8.0
      93.493739
      110.285714
      14.092003
      46.857143
      7



In [137]:

    
func = {'speed_mean':['mean'], 'speed_max':['mean'], 'speed_std':['mean'], 'speed_min':['mean'], 'date':['count']}
features = pd.DataFrame(features.groupby(["time", "grid"]).agg(func).reset_index().as_matrix(), columns = ["time", 
                                                                                               "grid", 
                                                                                               "speed_mean",
                                                                                               "speed_max",
                                                                                               "speed_std", 
                                                                                               "speed_min",
                                                                                               "traf_flow"])



In [32]:

    
weather = pd.read_csv("../weather.csv", encoding="latin-1")
weather["time"] = pd.to_datetime(weather["Time"])



In [33]:

    
weather["time"] = weather["time"].dt.strftime("%Y-%m-%d %H")



In [37]:

    
del weather["Time"]



In [42]:

    
weather = weather.groupby(["time"]).first().reset_index()



In [44]:

    
merged_feat_weat = pd.merge(weather, features)



In [45]:

    
merged_feat_weat.head()









    Out[45]:







  
    
      
      time
      Temp
      Weather
      Wind
      Humidity
      Barometer
      Visibility
      Unnamed: 0
      speed_mean
      speed_max
      speed_std
      speed_min
      y
      x
      grid
      date
    
  
  
    
      0
      2017-05-22 00
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
      0
      107.674931
      146.0
      27.909145
      66.0
      51.923162
      4.504515
      25.0
      2017-05-22_00
    
    
      1
      2017-05-22 00
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
      1
      139.439560
      178.0
      17.145918
      115.0
      51.876314
      4.572905
      72.0
      2017-05-22_00
    
    
      2
      2017-05-22 00
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
      2
      119.498054
      141.0
      20.024423
      19.0
      51.885456
      4.564509
      63.0
      2017-05-22_00
    
    
      3
      2017-05-22 00
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
      3
      101.987520
      118.0
      9.905010
      0.0
      51.883272
      4.425730
      59.0
      2017-05-22_00
    
    
      4
      2017-05-22 00
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
      4
      63.180233
      82.0
      18.516851
      17.0
      51.937475
      4.431121
      5.0
      2017-05-22_00



In [46]:

    
incidents_xy = pd.read_csv("../incident_with_xy.csv")



In [49]:

    
incidents_xy['time'] = incidents_xy["date"] + " " + incidents_xy["report_time"]



In [54]:

    
incidents_xy['time'] = pd.to_datetime(incidents_xy["time"]).dt.strftime("%Y-%m-%d %H")



In [88]:

    
incidents_xy.columns = ['Unnamed: 0', 'V1', 'date', 'report_time', 'gemelde.locatie', 'y', 'x',
       'time', 'grid']



In [89]:

    
incidents_xy["grid"] = -1
for ix, row in incidents_xy.iterrows():
    incidents_xy.loc[ix, "grid"] = grider(row)



In [90]:

    
incidents_xy.head()









    Out[90]:







  
    
      
      Unnamed: 0
      V1
      date
      report_time
      gemelde.locatie
      y
      x
      time
      grid
    
  
  
    
      0
      1
      0
      2017-05-23
      09:30:12.999000
      27/37,9
      51.855369
      4.954711
      2017-05-23 09
      NaN
    
    
      1
      2
      1
      2017-05-24
      07:49:54.999000
      15/95,8
      51.846133
      4.950630
      2017-05-24 07
      NaN
    
    
      2
      3
      2
      2017-05-24
      10:08:08.999000
      15/102,5
      51.839749
      5.047208
      2017-05-24 10
      NaN
    
    
      3
      4
      3
      2017-05-26
      17:44:37
      15/65,3
      51.866261
      4.584767
      2017-05-26 17
      NaN
    
    
      4
      5
      4
      2017-05-26
      20:26:48.999000
      15/79,1
      51.837240
      4.721092
      2017-05-26 20
      NaN



In [93]:

    
incidents_xy = incidents_xy.dropna()



In [97]:

    
incidents_xy.head()









    Out[97]:







  
    
      
      Unnamed: 0
      V1
      date
      report_time
      gemelde.locatie
      y
      x
      time
      grid
    
  
  
    
      9
      10
      9
      2017-06-01
      15:02:48
      15/50,2
      51.873703
      4.381252
      2017-06-01 15
      67.0
    
    
      15
      16
      15
      2017-06-02
      22:28:28
      13/18,0
      51.940589
      4.427824
      2017-06-02 22
      5.0
    
    
      17
      18
      0
      2017-05-22
      06:37:16
      16/L/20,5
      51.907173
      4.540709
      2017-05-22 06
      44.0
    
    
      18
      19
      1
      2017-05-22
      06:49:51
      15/R/60,8
      51.863643
      4.531110
      2017-05-22 06
      80.0
    
    
      20
      21
      3
      2017-05-22
      07:23:40
      20/L/29,6
      51.934234
      4.446092
      2017-05-22 07
      15.0



In [99]:

    
incidents_small = incidents_xy[["time", "grid"]]
incidents_small["output"] = 1









    



C:\Users\Zoltan\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [114]:

    
incidents_small.head()









    Out[114]:







  
    
      
      time
      grid
      output
    
  
  
    
      9
      2017-06-01 15
      67.0
      1
    
    
      15
      2017-06-02 22
      5.0
      1
    
    
      17
      2017-05-22 06
      44.0
      1
    
    
      18
      2017-05-22 06
      80.0
      1
    
    
      20
      2017-05-22 07
      15.0
      1



In [181]:

    
data_all_merged = pd.merge(features, incidents_small, on=['time', "grid"], how="outer")



In [182]:

    
data_all_merged["output"] = data_all_merged["output"].fillna(value=0)



In [183]:

    
data_all_merged = pd.merge(data_all_merged, weather)



In [184]:

    
data_all_merged.head()









    Out[184]:







  
    
      
      time
      grid
      speed_mean
      speed_max
      speed_std
      speed_min
      traf_flow
      output
      Temp
      Weather
      Wind
      Humidity
      Barometer
      Visibility
    
  
  
    
      0
      2017-05-22 00
      3.0
      98
      145
      62.7079
      0
      1
      0.0
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
    
    
      1
      2017-05-22 00
      5.0
      74.0765
      93.4167
      9.27648
      52.8333
      12
      0.0
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
    
    
      2
      2017-05-22 00
      6.0
      76.5184
      97.5714
      15.1522
      36.4286
      7
      0.0
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
    
    
      3
      2017-05-22 00
      7.0
      80.3153
      117.917
      21.3205
      31.1667
      12
      0.0
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN
    
    
      4
      2017-05-22 00
      8.0
      93.4937
      110.286
      14.092
      46.8571
      7
      0.0
      12
      Cool.
      9 km/h
      88%
      1022 mbar
      NaN



In [185]:

    
data_all_merged["Visibility"] = data_all_merged["Visibility"].fillna("2 km")
data_all_merged["Visibility"].value_counts(dropna=False)









    Out[185]:





2 km    16139
9 km      457
7 km      218
6 km      217
0 km      131
1 km      127
8 km       62
5 km       60
3 km       47
Name: Visibility, dtype: int64



In [186]:

    
data_all_merged["Wind"] = data_all_merged["Wind"].str.replace("km/h", "")
data_all_merged["Humidity"] = data_all_merged["Humidity"].str.replace("%", "")
data_all_merged["Visibility"] = data_all_merged["Visibility"].str.replace("km", "")



In [187]:

    
data_all_merged["hour"] = pd.to_datetime(data_all_merged["time"]).dt.hour
data_all_merged["day"] = pd.to_datetime(data_all_merged["time"]).dt.weekday



In [189]:

    
times = pd.to_datetime(data_all_merged["time"])



In [190]:

    
data_all_merged["times"] = times



In [203]:

    
data_all_merged.sort_values(["grid", "times"], inplace=True)



In [204]:

    
data_all_merged["output_shifted"] = data_all_merged.groupby(["grid", "times" ])["output"].sum().shift(1).reset_index()["output"]



In [207]:

    
data_all_merged["output_shifted"].describe()









    Out[207]:





count    17443.000000
mean         0.015135
std          0.128945
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          3.000000
Name: output_shifted, dtype: float64



In [210]:

    
data_all_merged["output_shifted"].loc[np.array(data_all_merged["output_shifted"] >= 1)] = 1









    



C:\Users\Zoltan\Anaconda3\lib\site-packages\pandas\core\indexing.py:179: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)



In [315]:

    
data_all_merged.head()









    Out[315]:







  
    
      
      time
      grid
      speed_mean
      speed_max
      speed_std
      speed_min
      traf_flow
      output
      Temp
      Weather
      Wind
      Humidity
      Barometer
      Visibility
      hour
      day
      times
      output_shifted
    
  
  
    
      44
      2017-05-22 01
      3.0
      108.021
      130
      19.1822
      64
      1
      0.0
      11
      Cool.
      7
      94
      1022 mbar
      2
      1
      0
      2017-05-22 01:00:00
      0.0
    
    
      179
      2017-05-22 05
      3.0
      105.15
      124
      14.6055
      64
      1
      0.0
      9
      Cool.
      6
      94
      1020 mbar
      2
      5
      0
      2017-05-22 05:00:00
      0.0
    
    
      228
      2017-05-22 06
      3.0
      101.368
      118.125
      14.2641
      71.125
      8
      0.0
      9
      Passing clouds.
      6
      100
      1020 mbar
      2
      6
      0
      2017-05-22 06:00:00
      0.0
    
    
      287
      2017-05-22 07
      3.0
      93.9023
      115.4
      15.4173
      55.7
      10
      0.0
      13
      Cool.
      2
      82
      1020 mbar
      2
      7
      0
      2017-05-22 07:00:00
      0.0
    
    
      348
      2017-05-22 08
      3.0
      88.8137
      117.2
      19.8105
      45.9
      10
      0.0
      15
      Cool.
      9
      68
      1020 mbar
      2
      8
      0
      2017-05-22 08:00:00
      0.0



In [364]:

    
#dummies_train = pd.concat([pd.get_dummies(train_feat_cat[col]) for col in train_feat_cat.columns], axis=1, keys=train_feat_cat.columns).as_matrix()
#dummies_test  = pd.concat([pd.get_dummies(test_feat_cat[col]) for col in test_feat_cat.columns], axis=1, keys=train_feat_cat.columns).as_matrix()
dummies = pd.concat([pd.get_dummies(feat_cat[col]) for col in feat_cat.columns], axis=1, keys=feat_cat.columns).as_matrix()



In [367]:

    
data_all_merged = data_all_merged[data_all_merged.Wind.str.contains("No") == False]
data_all_merged = data_all_merged.dropna()



In [370]:

    
features_to_model_numeric = ["Temp", "speed_mean", "speed_max", "speed_std",
                             "speed_min", "traf_flow", "Wind", "Humidity", "Visibility" ]
features_to_model_categoric = ["grid", "hour", "day", "Weather"]
features_to_model_label = ["Weather"]



In [371]:

    
feat_cat = data_all_merged[features_to_model_categoric]
feat_num = data_all_merged[features_to_model_numeric]
train_feat_num = train_data[features_to_model_numeric]
train_feat_cat = train_data[features_to_model_categoric]
train_feat_lab = train_data[features_to_model_label]
y_train = train_data.output_shifted

test_feat_num = test_data[features_to_model_numeric]
test_feat_cat = test_data[features_to_model_categoric]
test_feat_lab = test_data[features_to_model_label]
y_test = test_data.output_shifted



In [400]:

    
rob_scale = RobustScaler()
feat_num = rob_scale.fit_transform(feat_num)



In [401]:

    
all_data = pd.concat([times, pd.DataFrame(feat_num), pd.DataFrame(dummies), pd.DataFrame(data_all_merged['output_shifted'])], ignore_index=True, axis=1)



In [402]:

    
all_data.dropna(inplace=True)



In [403]:

    
all_data.head()









    Out[403]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
    
  
  
    
      1
      2017-05-22
      -1.666667
      0.482282
      0.168314
      -0.481384
      0.759373
      -0.333333
      -0.5
      1.04
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      2017-05-22
      -1.666667
      0.313752
      -0.248041
      -0.518721
      0.974311
      -0.121212
      -0.5
      1.28
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      2017-05-22
      -1.000000
      -0.018871
      -0.441159
      -0.392590
      0.508988
      -0.060606
      -0.9
      0.56
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      2017-05-22
      -0.666667
      -0.245602
      -0.313595
      0.087910
      0.213352
      -0.060606
      -0.2
      0.00
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      5
      2017-05-22
      -0.166667
      0.041523
      0.117693
      0.610843
      0.061225
      -0.151515
      -0.4
      -0.32
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

5 rows × 136 columns



In [414]:

    
train_data = all_data.loc[~(pd.to_datetime(all_data[0]) >= "2017-06-02")]
test_data = all_data.loc[pd.to_datetime(all_data[0]) <= "2017-06-02"]



In [415]:

    
x_train = train_data.iloc[:, 1:-1]
x_test = test_data.iloc[:, 1:-1]
y_train = train_data.iloc[:, -1]
y_test = test_data.iloc[:, -1]



In [416]:

    
print(x_train.shape)
print(y_train.shape)









    



(14618, 134)
(14618,)



In [417]:

    
rf = RandomForestClassifier()



In [418]:

    
rf.fit(X=x_train, y = y_train)









    Out[418]:





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)



In [419]:

    
rf.score(X=x_test, y = y_test)









    Out[419]:





0.99679552737437782



In [423]:

    
y_test.sum() / y_test.shape[0]









    Out[423]:





0.011727006204404445



In [425]:

    
rf.predict_proba(X=x_test)









    Out[425]:





array([[ 1. ,  0. ],
       [ 0.9,  0.1],
       [ 1. ,  0. ],
       ..., 
       [ 1. ,  0. ],
       [ 1. ,  0. ],
       [ 1. ,  0. ]])



In [422]:

    
np.where(y == 1)









    Out[422]:





(array([  111,   138,   235,   251,   264,   311,   324,   336,   359,
          380,   510,   540,   570,   571,   598,   639,   645,   885,
          923,   954,  1153,  1304,  1309,  1317,  1666,  1699,  1745,
         1807,  1868,  1973,  2072,  2186,  2218,  2239,  2252,  2276,
         2319,  2324,  2327,  2345,  2573,  2589,  2620,  2653,  2754,
         2927,  3042,  3061,  3284,  3301,  3329,  3416,  3477,  3649,
         3665,  3810,  3839,  3853,  4055,  4070,  4155,  4183,  4236,
         4275,  4282,  4305,  4316,  4541,  4701,  4869,  4907,  4992,
         5100,  5215,  5336,  5435,  5463,  5576,  5586,  5588,  5635,
         5710,  5766,  5886,  5891,  5944,  6043,  6045,  6076,  6096,
         6124,  6157,  6248,  6259,  6372,  6405,  6445,  6473,  6492,
         6542,  6648,  6719,  6785,  6883,  6919,  6931,  6964,  7045,
         7144,  7304,  7341,  7382,  7400,  7610,  7635,  7659,  7673,
         7677,  7704,  7736,  7741,  7777,  7899,  7901,  7905,  7918,
         8188,  8196,  8205,  8334,  8414,  8546,  8613,  8634,  8636,
         8702,  8753,  8878,  8880,  8901,  8907,  8910,  8982,  9030,
         9115,  9162,  9192,  9201,  9248,  9452,  9460,  9512,  9610,
         9621,  9670,  9732,  9769,  9895,  9899,  9918,  9944,  9959,
         9987, 10002, 10232, 10281, 10320, 10399, 10467, 10971, 10985,
        11027, 11183, 11211, 11277, 11281, 11591, 11996, 12097, 12130,
        12156, 12166, 12415, 12485, 12576, 12604, 12614, 12726, 12759,
        12810, 12861, 12907, 12966, 13049, 13068, 13086, 13103, 13222,
        13243, 13294, 13512, 13561, 13569, 13604, 13812, 13833, 13894,
        13923, 14023, 14133, 14160, 14171, 14341, 14378, 14389, 14424,
        14624, 14658, 14670, 14697, 14925, 15070, 15174, 15235, 15345,
        15513, 15577, 15737, 15875, 15890, 16026, 16072, 16153, 16155,
        16258, 16383, 16472, 16480, 16529, 16564, 16673, 16729, 16901,
        17004, 17010, 17302, 17309], dtype=int64),)



In [429]:

    
x_train









    Out[429]:







  
    
      
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      ...
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
    
  
  
    
      1
      -1.666667
      0.482282
      0.168314
      -0.481384
      0.759373
      -0.333333
      -0.5
      1.04
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      -1.666667
      0.313752
      -0.248041
      -0.518721
      0.974311
      -0.121212
      -0.5
      1.28
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      -1.000000
      -0.018871
      -0.441159
      -0.392590
      0.508988
      -0.060606
      -0.9
      0.56
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      -0.666667
      -0.245602
      -0.313595
      0.087910
      0.213352
      -0.060606
      -0.2
      0.00
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      5
      -0.166667
      0.041523
      0.117693
      0.610843
      0.061225
      -0.151515
      -0.4
      -0.32
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      6
      0.166667
      1.339987
      0.664396
      -1.348249
      1.966047
      -0.333333
      -0.2
      -0.60
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      7
      0.333333
      0.873521
      0.646679
      -1.039961
      1.641753
      -0.242424
      0.2
      -0.88
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      8
      0.500000
      0.191321
      -1.071892
      -1.657638
      1.558794
      -0.303030
      0.6
      -0.84
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      9
      0.500000
      0.344550
      0.640773
      -0.878810
      1.151542
      -0.272727
      0.4
      -1.08
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      10
      0.833333
      0.225793
      0.008859
      -0.712070
      1.023333
      -0.242424
      0.2
      -1.16
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      11
      0.833333
      0.139226
      -0.693924
      -0.869712
      0.869984
      -0.181818
      0.2
      -1.16
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      12
      0.833333
      0.338001
      -0.162408
      -0.601957
      0.856577
      -0.090909
      0.2
      -1.16
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      13
      0.833333
      0.226974
      -0.049745
      -0.370417
      0.722244
      0.030303
      0.2
      -1.28
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14
      0.666667
      -0.187041
      -0.434072
      -0.712001
      1.005017
      0.060606
      -0.2
      -0.84
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      15
      0.833333
      0.685817
      0.699830
      -0.511093
      1.040930
      -0.181818
      -0.5
      -1.16
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      16
      0.333333
      0.250703
      -0.115162
      -0.469498
      0.953302
      -0.151515
      -0.4
      -0.72
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      17
      0.166667
      1.161532
      1.160478
      -0.515405
      1.513544
      -0.303030
      -0.5
      -0.48
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      18
      -0.333333
      2.436924
      3.782626
      -0.524577
      2.569384
      -0.333333
      -0.7
      0.20
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      19
      -0.333333
      0.266766
      -0.256900
      0.114260
      -0.296467
      -0.333333
      -0.5
      -0.16
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      20
      -0.833333
      3.680763
      4.633053
      -1.663490
      3.957059
      -0.333333
      -0.7
      0.56
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      21
      -1.000000
      0.651751
      -0.186031
      -0.476741
      0.668872
      -0.333333
      -0.2
      0.56
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      22
      -1.000000
      0.027192
      -0.670301
      -0.548392
      0.246536
      -0.181818
      -0.2
      0.56
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      23
      -0.666667
      0.271654
      -0.140932
      -0.519964
      0.707266
      -0.030303
      0.0
      0.56
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      24
      -0.333333
      -0.278698
      -0.239182
      0.004930
      0.476558
      0.121212
      0.0
      0.20
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      25
      -0.333333
      -1.033741
      -0.752982
      0.783758
      -0.458904
      0.030303
      0.2
      0.36
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      26
      -0.166667
      0.321321
      0.522658
      0.081901
      0.397370
      -0.272727
      0.2
      0.00
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      27
      -0.166667
      0.075694
      -0.280523
      -0.296385
      0.397370
      -0.272727
      0.9
      0.20
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      28
      -0.333333
      -0.016141
      -0.356116
      -0.248716
      0.566305
      -0.060606
      0.6
      0.20
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      29
      -0.166667
      0.230678
      -0.631492
      -0.994696
      0.948993
      -0.151515
      0.6
      0.00
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      30
      0.000000
      0.247990
      -0.115162
      -0.532081
      0.319798
      -0.151515
      0.8
      -0.32
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      14713
      -1.000000
      1.235104
      1.433829
      -0.559588
      1.263590
      -0.151515
      -0.2
      0.56
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14714
      -1.000000
      0.348043
      0.071550
      -0.741557
      0.995486
      1.212121
      -0.2
      0.56
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14715
      -0.666667
      0.060236
      -0.141272
      -0.382105
      0.641563
      2.515152
      0.0
      0.56
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14716
      -0.333333
      -0.639377
      -0.347546
      0.542771
      -0.230755
      3.545455
      0.0
      0.20
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14717
      -0.333333
      0.124107
      0.006061
      -0.500616
      0.834790
      1.939394
      0.2
      0.36
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14718
      -0.166667
      0.431231
      -0.142259
      -1.048929
      1.155091
      0.666667
      0.2
      0.00
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14719
      -0.166667
      0.452053
      -0.149639
      -1.095060
      1.267317
      0.757576
      0.9
      0.20
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14720
      -0.333333
      0.345246
      -0.033390
      -0.767642
      1.023333
      1.212121
      0.6
      0.20
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14721
      -0.166667
      0.446474
      -0.075790
      -0.892068
      1.130760
      1.000000
      0.6
      0.00
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14722
      0.000000
      0.380425
      -0.024668
      -0.912402
      1.087495
      1.606061
      0.8
      -0.32
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      14723
      -0.166667
      0.351441
      -0.045455
      -0.934052
      1.129782
      1.484848
      0.8
      -0.16
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14724
      -0.166667
      -0.243337
      -0.003662
      0.047228
      0.197465
      1.909091
      0.9
      -0.32
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14725
      -0.166667
      -1.247287
      -0.480317
      1.413601
      -0.725450
      3.212121
      0.9
      -0.32
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14726
      -0.166667
      -0.050909
      -0.141919
      -0.076959
      0.323800
      2.606061
      0.8
      -0.32
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14727
      -0.666667
      0.636859
      0.215989
      -0.991920
      1.317734
      1.303030
      0.0
      0.36
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14728
      -0.666667
      0.852902
      0.706273
      -0.914066
      1.364081
      0.303030
      -0.2
      0.36
      0.0
      0.0
      ...
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14729
      -0.833333
      0.604413
      0.052824
      -1.027006
      1.138134
      0.454545
      -0.7
      0.56
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14730
      -0.833333
      0.386250
      0.001266
      -0.782044
      0.886504
      0.060606
      -0.9
      0.56
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14731
      -0.833333
      0.921996
      1.295773
      -0.002221
      0.816964
      -0.030303
      -0.9
      0.56
      0.0
      0.0
      ...
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14732
      -1.000000
      0.522820
      0.451789
      -0.270501
      0.744289
      -0.242424
      -0.9
      0.80
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14733
      -1.166667
      1.271312
      0.770699
      -1.390146
      2.056547
      -0.303030
      -0.4
      0.80
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14734
      -1.166667
      -0.195572
      -0.398637
      0.155663
      0.518038
      -0.333333
      0.0
      1.04
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      14735
      -1.000000
      0.407472
      -0.682113
      -1.391854
      1.573878
      -0.303030
      -0.2
      1.04
      5.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14736
      -1.000000
      0.981943
      0.855742
      -0.629457
      0.877023
      -0.060606
      -0.4
      1.04
      4.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14737
      -1.166667
      0.474675
      0.294728
      -0.772700
      1.027613
      0.757576
      -0.4
      1.28
      4.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14738
      -0.666667
      0.429494
      -0.020183
      -1.036811
      1.234889
      2.575758
      -0.2
      1.04
      7.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14739
      -0.500000
      0.482351
      0.124810
      -0.933299
      1.223225
      2.696970
      -0.4
      0.60
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14740
      -0.333333
      0.382463
      0.054742
      -0.837344
      1.002254
      2.000000
      -0.4
      0.36
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14741
      -0.333333
      0.401085
      -0.111787
      -1.030423
      1.263590
      0.909091
      0.0
      0.36
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      14742
      -0.166667
      0.589347
      0.387363
      -0.715347
      1.129602
      0.969697
      0.2
      0.20
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

14618 rows × 134 columns



In [430]:

    
all_data.columns









    Out[430]:





RangeIndex(start=0, stop=136, step=1)



In [ ]:

	datum	gemelde locatie	werkelijke locatie	tijdstip melding	tijdstip aankomst
0	2017-05-22	16 L 20.5	16 L 20.68	06:37:16	06:57:27
1	2017-05-22	15 R 60.8	29 L 9.56 s	06:49:51	06:49:26
2	2017-05-22	15 R 53.3 a	15 R 53.27 a	07:18:40	07:30:07
3	2017-05-22	20 L 29.6	20 L 29.69 s	07:23:40	07:36:28
4	2017-05-22	20 L 34.3	20 L 34.25 v	07:24:07	07:29:15

	RowID	SessionID	Latitude	Longitude	Bearing	Speed	created_at	geohash	SessionCount
0	456393953	FC847645-41FA-4C24-AB47-134042D9C1FB	51.864525	4.526764	284	46	Mon May 22 2017 14:58:03 GMT+0200 (CEST)	u15pp18	NaN
1	456393954	FC847645-41FA-4C24-AB47-134042D9C1FB	51.864556	4.526617	286	39	Mon May 22 2017 14:58:03 GMT+0200 (CEST)	u15pp18	NaN
2	456393955	FC847645-41FA-4C24-AB47-134042D9C1FB	51.864584	4.526479	287	37	Mon May 22 2017 14:58:03 GMT+0200 (CEST)	u15pp18	NaN
3	456393956	FC847645-41FA-4C24-AB47-134042D9C1FB	51.864581	4.526385	285	28	Mon May 22 2017 14:58:03 GMT+0200 (CEST)	u15pp18	NaN
4	456393957	FC847645-41FA-4C24-AB47-134042D9C1FB	51.864601	4.526271	286	28	Mon May 22 2017 14:58:03 GMT+0200 (CEST)	u15pncx	NaN

	Unnamed: 0	V1	date	report_time	gemelde.locatie	y	x	publicated_on	publicatedOn
0	1	0	2017-05-23	09:30:12.999000	27/37,9	51.855369	4.954711	2017-05-23 09:30:12.999	2017-05-23 09:30:12.999
1	2	1	2017-05-24	07:49:54.999000	15/95,8	51.846133	4.950630	2017-05-24 07:49:54.999	2017-05-24 07:49:54.999
2	3	2	2017-05-24	10:08:08.999000	15/102,5	51.839749	5.047208	2017-05-24 10:08:08.999	2017-05-24 10:08:08.999
3	4	3	2017-05-26	17:44:37	15/65,3	51.866261	4.584767	2017-05-26 17:44:37.000	2017-05-26 17:44:37.000
4	5	4	2017-05-26	20:26:48.999000	15/79,1	51.837240	4.721092	2017-05-26 20:26:48.999	2017-05-26 20:26:48.999

0	1	2
1	52	2017-05-21 00:08:55
1	22	2017-05-21 00:42:35
1	69	2017-05-21 01:25:09
1	78	2017-05-21 01:25:47
1	23	2017-05-21 04:16:59

	Unnamed: 0	speed_mean	speed_max	speed_std	speed_min	y	x	grid	date
0	0	107.674931	146.0	27.909145	66.0	51.923162	4.504515	25.0	2017-05-22_00
1	1	139.439560	178.0	17.145918	115.0	51.876314	4.572905	72.0	2017-05-22_00
2	2	119.498054	141.0	20.024423	19.0	51.885456	4.564509	63.0	2017-05-22_00
3	3	101.987520	118.0	9.905010	0.0	51.883272	4.425730	59.0	2017-05-22_00
4	4	63.180233	82.0	18.516851	17.0	51.937475	4.431121	5.0	2017-05-22_00
5	5	121.197107	134.0	12.097694	82.0	51.878232	4.466523	69.0	2017-05-22_00
6	6	75.811111	83.0	3.934561	66.0	51.934747	4.449704	15.0	2017-05-22_00
7	7	137.302885	151.0	9.648889	108.0	51.913590	4.543788	36.0	2017-05-22_00
8	8	75.790123	87.0	3.700650	67.0	51.939301	4.429959	5.0	2017-05-22_00
9	9	114.485591	203.0	26.753035	59.0	51.921418	4.508817	26.0	2017-05-22_00

	time	grid	speed_mean	speed_max	speed_std	speed_min	date
			mean	mean	mean	mean	count
0	2017-05-22 00	3.0	98.000000	145.000000	62.707940	0.000000	1
1	2017-05-22 00	5.0	74.076514	93.416667	9.276480	52.833333	12
2	2017-05-22 00	6.0	76.518359	97.571429	15.152162	36.428571	7
3	2017-05-22 00	7.0	80.315252	117.916667	21.320476	31.166667	12
4	2017-05-22 00	8.0	93.493739	110.285714	14.092003	46.857143	7

	time	Temp	Weather	Wind	Humidity	Barometer	Visibility	Unnamed: 0	speed_mean	speed_max	speed_std	speed_min	y	x	grid	date
0	2017-05-22 00	12	Cool.	9 km/h	88%	1022 mbar	NaN	0	107.674931	146.0	27.909145	66.0	51.923162	4.504515	25.0	2017-05-22_00
1	2017-05-22 00	12	Cool.	9 km/h	88%	1022 mbar	NaN	1	139.439560	178.0	17.145918	115.0	51.876314	4.572905	72.0	2017-05-22_00
2	2017-05-22 00	12	Cool.	9 km/h	88%	1022 mbar	NaN	2	119.498054	141.0	20.024423	19.0	51.885456	4.564509	63.0	2017-05-22_00
3	2017-05-22 00	12	Cool.	9 km/h	88%	1022 mbar	NaN	3	101.987520	118.0	9.905010	0.0	51.883272	4.425730	59.0	2017-05-22_00
4	2017-05-22 00	12	Cool.	9 km/h	88%	1022 mbar	NaN	4	63.180233	82.0	18.516851	17.0	51.937475	4.431121	5.0	2017-05-22_00

	Unnamed: 0	V1	date	report_time	gemelde.locatie	y	x	time	grid
9	10	9	2017-06-01	15:02:48	15/50,2	51.873703	4.381252	2017-06-01 15	67.0
15	16	15	2017-06-02	22:28:28	13/18,0	51.940589	4.427824	2017-06-02 22	5.0
17	18	0	2017-05-22	06:37:16	16/L/20,5	51.907173	4.540709	2017-05-22 06	44.0
18	19	1	2017-05-22	06:49:51	15/R/60,8	51.863643	4.531110	2017-05-22 06	80.0
20	21	3	2017-05-22	07:23:40	20/L/29,6	51.934234	4.446092	2017-05-22 07	15.0

	time	grid	speed_mean	speed_max	speed_std	speed_min	traf_flow	Temp	Weather	Wind	Humidity	Barometer	Visibility
0	2017-05-22 00	3.0	98	145	62.7079	0	1	12	Cool.	9 km/h	88%	1022 mbar	NaN
1	2017-05-22 00	5.0	74.0765	93.4167	9.27648	52.8333	12	12	Cool.	9 km/h	88%	1022 mbar	NaN
2	2017-05-22 00	6.0	76.5184	97.5714	15.1522	36.4286	7	12	Cool.	9 km/h	88%	1022 mbar	NaN
3	2017-05-22 00	7.0	80.3153	117.917	21.3205	31.1667	12	12	Cool.	9 km/h	88%	1022 mbar	NaN
4	2017-05-22 00	8.0	93.4937	110.286	14.092	46.8571	7	12	Cool.	9 km/h	88%	1022 mbar	NaN

	time	grid	speed_mean	speed_max	speed_std	speed_min	traf_flow	Temp	Weather	Wind	Humidity	Barometer	Visibility	hour	times
44	2017-05-22 01	3.0	108.021	130	19.1822	64	1	11	Cool.	7	94	1022 mbar	2	1	2017-05-22 01:00:00
179	2017-05-22 05	3.0	105.15	124	14.6055	64	1	9	Cool.	6	94	1020 mbar	2	5	2017-05-22 05:00:00
228	2017-05-22 06	3.0	101.368	118.125	14.2641	71.125	8	9	Passing clouds.	6	100	1020 mbar	2	6	2017-05-22 06:00:00
287	2017-05-22 07	3.0	93.9023	115.4	15.4173	55.7	10	13	Cool.	2	82	1020 mbar	2	7	2017-05-22 07:00:00
348	2017-05-22 08	3.0	88.8137	117.2	19.8105	45.9	10	15	Cool.	9	68	1020 mbar	2	8	2017-05-22 08:00:00

	0	1	2	3	4	5	6	7	8	...	130
1	2017-05-22	-1.666667	0.482282	0.168314	-0.481384	0.759373	-0.333333	-0.5	1.04	...	0.0
2	2017-05-22	-1.666667	0.313752	-0.248041	-0.518721	0.974311	-0.121212	-0.5	1.28	...	1.0
3	2017-05-22	-1.000000	-0.018871	-0.441159	-0.392590	0.508988	-0.060606	-0.9	0.56	...	0.0
4	2017-05-22	-0.666667	-0.245602	-0.313595	0.087910	0.213352	-0.060606	-0.2	0.00	...	0.0
5	2017-05-22	-0.166667	0.041523	0.117693	0.610843	0.061225	-0.151515	-0.4	-0.32	...	0.0