Loading the necessary libraries


In [22]:
%matplotlib inline

from pandas import Series, DataFrame
import pandas as pd
from itertools import *
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
from matplotlib import pylab
from scipy.signal import hilbert, chirp
import scipy
import networkx as nx

Loading the dataset 0750-0805

Description of the dataset is at: D:/zzzLola/PhD/DataSet/US101/US101_time_series/US-101-Main-Data/vehicle-trajectory-data/trajectory-data-dictionary.htm


In [2]:
c_dataset = ['vID','fID', 'tF', 'Time', 'lX', 'lY', 'gX', 'gY', 'vLen', 'vWid', 'vType','vVel', 'vAcc', 'vLane', 'vPrec', 'vFoll', 'spac','headway' ]

dataset = pd.read_table('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\trajectories-0750am-0805am.txt', sep=r"\s+", 
                        header=None, names=c_dataset)

In [3]:
dataset[:10]


Out[3]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
0 2 13 437 1118846980200 16.467 35.381 6451137.641 1873344.962 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
1 2 14 437 1118846980300 16.447 39.381 6451140.329 1873342.000 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
2 2 15 437 1118846980400 16.426 43.381 6451143.018 1873339.038 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
3 2 16 437 1118846980500 16.405 47.380 6451145.706 1873336.077 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
4 2 17 437 1118846980600 16.385 51.381 6451148.395 1873333.115 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
5 2 18 437 1118846980700 16.364 55.381 6451151.084 1873330.153 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
6 2 19 437 1118846980800 16.344 59.381 6451153.772 1873327.192 14.5 4.9 2 40.00 0.00 2 0 0 0.0 0.0
7 2 20 437 1118846980900 16.323 63.379 6451156.461 1873324.230 14.5 4.9 2 40.02 0.25 2 0 0 0.0 0.0
8 2 21 437 1118846981000 16.303 67.383 6451159.149 1873321.268 14.5 4.9 2 40.03 0.13 2 0 0 0.0 0.0
9 2 22 437 1118846981100 16.282 71.398 6451161.838 1873318.307 14.5 4.9 2 39.93 -1.63 2 0 13 0.0 0.0
  • What is the number of different vehicles for the 15 min
  • How many timestamps? Are the timestamps of the vehicles matched?
  • To transfor the distaces, veloc and acceleration to meters, m/s.
  • To compute the distances all to all.
  • Compute the time cycles.

In [4]:
numV = dataset['vID'].unique()
len(numV)


Out[4]:
2169

In [5]:
numTS = dataset['Time'].unique()
len(numTS)


Out[5]:
9529

15min = 900 s = 9000 ms // 9529ms = 952.9s = 15min 52.9s The actual temporal length of this dataset is 15min 52.9s. Looks like the timestamp of the vehicles is matches. Which make sense attending to the way the data is obtained. There is no GPS on the vehicles, but from cameras synchronized localized at different buildings.


In [6]:
#Converting to meters
dataset['lX'] = dataset.lX * 0.3048
dataset['lY'] = dataset.lY * 0.3048
dataset['gX'] = dataset.gX * 0.3048
dataset['gY'] = dataset.gY * 0.3048
dataset['vLen'] = dataset.vLen * 0.3048
dataset['vWid'] = dataset.vWid * 0.3048
dataset['spac'] = dataset.spac * 0.3048
dataset['vVel'] = dataset.vVel * 0.3048
dataset['vAcc'] = dataset.vAcc * 0.3048

In [7]:
dataset[:10]


Out[7]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
0 2 13 437 1118846980200 5.019142 10.784129 1.966307e+06 570995.544418 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
1 2 14 437 1118846980300 5.013046 12.003329 1.966308e+06 570994.641600 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
2 2 15 437 1118846980400 5.006645 13.222529 1.966308e+06 570993.738782 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
3 2 16 437 1118846980500 5.000244 14.441424 1.966309e+06 570992.836270 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
4 2 17 437 1118846980600 4.994148 15.660929 1.966310e+06 570991.933452 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
5 2 18 437 1118846980700 4.987747 16.880129 1.966311e+06 570991.030634 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
6 2 19 437 1118846980800 4.981651 18.099329 1.966312e+06 570990.128122 4.4196 1.49352 2 12.192000 0.000000 2 0 0 0.0 0.0
7 2 20 437 1118846980900 4.975250 19.317919 1.966312e+06 570989.225304 4.4196 1.49352 2 12.198096 0.076200 2 0 0 0.0 0.0
8 2 21 437 1118846981000 4.969154 20.538338 1.966313e+06 570988.322486 4.4196 1.49352 2 12.201144 0.039624 2 0 0 0.0 0.0
9 2 22 437 1118846981100 4.962754 21.762110 1.966314e+06 570987.419974 4.4196 1.49352 2 12.170664 -0.496824 2 0 13 0.0 0.0

For every time stamp, check how many vehicles are accelerating when the one behind is also or not... :

  • vehicle_acceleration vs precedin_vehicl_acceleration
  • vehicle_acceleration vs follower_vehicl_acceleration

When is a vehicle changing lanes?


In [8]:
dataset['tF'].describe()


Out[8]:
count    1.180598e+06
mean     5.855417e+02
std      1.602528e+02
min      1.770000e+02
25%      4.700000e+02
50%      5.410000e+02
75%      6.870000e+02
max      1.010000e+03
Name: tF, dtype: float64

In [9]:
des_all = dataset.describe()
des_all


Out[9]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
count 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06 1.180598e+06
mean 1.687341e+03 4.972144e+03 5.855417e+02 1.118847e+12 8.997239e+00 2.996211e+02 1.966516e+06 5.707959e+05 4.496373e+00 1.873111e+00 2.009898e+00 1.142891e+01 1.002950e-01 2.965503e+00 1.603584e+03 1.610456e+03 2.342746e+01 1.663407e+02
std 8.627387e+02 2.631668e+03 1.602528e+02 2.631668e+05 5.078526e+00 1.812429e+02 1.356282e+02 1.206658e+02 1.559558e+00 3.132597e-01 1.874141e-01 4.534187e+00 1.537803e+00 1.468343e+00 9.163973e+02 9.168453e+02 1.479561e+01 1.269462e+03
min 2.000000e+00 8.000000e+00 1.770000e+02 1.118847e+12 1.554480e-01 0.000000e+00 1.966297e+06 5.705475e+05 1.219200e+00 6.096000e-01 1.000000e+00 0.000000e+00 -3.413760e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
25% 1.023000e+03 2.705000e+03 4.700000e+02 1.118847e+12 5.284622e+00 1.435899e+02 1.966398e+06 5.706979e+05 3.810000e+00 1.645920e+00 2.000000e+00 9.113520e+00 -4.511040e-01 2.000000e+00 8.900000e+02 8.980000e+02 1.477670e+01 1.490000e+00
50% 1.720000e+03 5.074000e+03 5.410000e+02 1.118847e+12 9.035796e+00 2.841087e+02 1.966504e+06 5.708051e+05 4.419600e+00 1.828800e+00 2.000000e+00 1.196340e+01 0.000000e+00 3.000000e+00 1.657000e+03 1.662000e+03 2.064410e+01 2.020000e+00
75% 2.490000e+03 7.387000e+03 6.870000e+02 1.118848e+12 1.277295e+01 4.469289e+02 1.966626e+06 5.708986e+05 5.029200e+00 2.103120e+00 2.000000e+00 1.443838e+01 7.254240e-01 4.000000e+00 2.458000e+03 2.467000e+03 2.919374e+01 2.830000e+00
max 3.109000e+03 9.536000e+03 1.010000e+03 1.118848e+12 2.239518e+01 6.691768e+02 1.966793e+06 5.710041e+05 2.319528e+01 2.590800e+00 3.000000e+00 2.904744e+01 3.413760e+00 8.000000e+00 3.109000e+03 3.109000e+03 2.370521e+02 9.999990e+03

In [10]:
des_all.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\description_allDataset_160502.csv', sep='\t', encoding='utf-8')

In [11]:
dataset.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters_160502.txt', sep='\t', encoding='utf-8',index=False)

In [12]:
#table.groupby('YEARMONTH').CLIENTCODE.nunique()
v_num_lanes = dataset.groupby('vID').vLane.nunique()

In [13]:
v_num_lanes[v_num_lanes > 1].count()


Out[13]:
601

In [14]:
v_num_lanes[v_num_lanes == 1].count()


Out[14]:
1568

In [16]:
#Drop some field are not necessary for the time being. 
dataset = dataset.drop(['fID','tF','lX','lY','vLen','vWid', 'vType','vVel', 'vAcc', 
                        'vLane', 'vPrec', 'vFoll','spac','headway'], axis=1)
dataset[:10]


Out[16]:
vID Time gX gY
0 2 1118846980200 1.966307e+06 570995.544418
1 2 1118846980300 1.966308e+06 570994.641600
2 2 1118846980400 1.966308e+06 570993.738782
3 2 1118846980500 1.966309e+06 570992.836270
4 2 1118846980600 1.966310e+06 570991.933452
5 2 1118846980700 1.966311e+06 570991.030634
6 2 1118846980800 1.966312e+06 570990.128122
7 2 1118846980900 1.966312e+06 570989.225304
8 2 1118846981000 1.966313e+06 570988.322486
9 2 1118846981100 1.966314e+06 570987.419974

In [32]:
def save_graph(graph,file_name):
    #initialze Figure
    plt.figure(num=None, figsize=(20, 20), dpi=80)
    plt.axis('off')
    fig = plt.figure(1)
    pos = nx.random_layout(graph) #spring_layout(graph)
    nx.draw_networkx_nodes(graph,pos)
    nx.draw_networkx_edges(graph,pos)
    nx.draw_networkx_labels(graph,pos)

    #cut = 1.00
    #xmax = cut * max(xx for xx, yy in pos.values())
    #ymax = cut * max(yy for xx, yy in pos.values())
    #plt.xlim(0, xmax)
    #plt.ylim(0, ymax)

    plt.savefig(file_name,bbox_inches="tight")
    pylab.close()
    del fig

In [33]:
times = dataset['Time'].unique()

#data = pd.DataFrame()
#data = data.fillna(0) # with 0s rather than NaNs

dTime = pd.DataFrame()

for time in times:
    #print 'Time %i ' %time
    
    dataTime0 = dataset.loc[dataset['Time'] == time] 
    
    list_vIDs = dataTime0.vID.tolist()
    #print list_vIDs
    
    dataTime = dataTime0.set_index("vID")
    #index_dataTime = dataTime.index.values
    #print dataTime
    
    perm = list(permutations(list_vIDs,2))
    #print perm
    dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + 
            (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
    dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'}) 
    

    #Create the fields vID and To
    dataDist['FromTo'] = dataDist.index
    dataDist['From'] = dataDist.FromTo.str[0]
    dataDist['To'] = dataDist.FromTo.str[1]
    #I multiply by 100 in order to scale the number
    dataDist['weight'] = (1/dataDist.dist)*100
    
    #Delete the intermediate FromTo field
    dataDist = dataDist.drop('FromTo', 1)
    

    
    graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight'])
 

    save_graph(graph,'D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\graphs\\%i_my_graph.png' %time)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-33-2264bff73fbc> in <module>()
     22     #print perm
     23     dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + 
---> 24             (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
     25     dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'})
     26 

C:\Anaconda2\lib\site-packages\pandas\core\generic.pyc in _indexer(self)
   1306 
   1307             def _indexer(self):
-> 1308                 i = getattr(self, iname)
   1309                 if i is None:
   1310                     i = indexer(self, name)

KeyboardInterrupt: 

In [ ]: