Loading the necessary libraries
In [22]:
%matplotlib inline
from pandas import Series, DataFrame
import pandas as pd
from itertools import *
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
from matplotlib import pylab
from scipy.signal import hilbert, chirp
import scipy
import networkx as nx
Loading the dataset 0750-0805
Description of the dataset is at: D:/zzzLola/PhD/DataSet/US101/US101_time_series/US-101-Main-Data/vehicle-trajectory-data/trajectory-data-dictionary.htm
In [2]:
c_dataset = ['vID','fID', 'tF', 'Time', 'lX', 'lY', 'gX', 'gY', 'vLen', 'vWid', 'vType','vVel', 'vAcc', 'vLane', 'vPrec', 'vFoll', 'spac','headway' ]
dataset = pd.read_table('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\trajectories-0750am-0805am.txt', sep=r"\s+",
header=None, names=c_dataset)
In [3]:
dataset[:10]
Out[3]:
In [4]:
numV = dataset['vID'].unique()
len(numV)
Out[4]:
In [5]:
numTS = dataset['Time'].unique()
len(numTS)
Out[5]:
15min = 900 s = 9000 ms // 9529ms = 952.9s = 15min 52.9s The actual temporal length of this dataset is 15min 52.9s. Looks like the timestamp of the vehicles is matches. Which make sense attending to the way the data is obtained. There is no GPS on the vehicles, but from cameras synchronized localized at different buildings.
In [6]:
#Converting to meters
dataset['lX'] = dataset.lX * 0.3048
dataset['lY'] = dataset.lY * 0.3048
dataset['gX'] = dataset.gX * 0.3048
dataset['gY'] = dataset.gY * 0.3048
dataset['vLen'] = dataset.vLen * 0.3048
dataset['vWid'] = dataset.vWid * 0.3048
dataset['spac'] = dataset.spac * 0.3048
dataset['vVel'] = dataset.vVel * 0.3048
dataset['vAcc'] = dataset.vAcc * 0.3048
In [7]:
dataset[:10]
Out[7]:
For every time stamp, check how many vehicles are accelerating when the one behind is also or not... :
When is a vehicle changing lanes?
In [8]:
dataset['tF'].describe()
Out[8]:
In [9]:
des_all = dataset.describe()
des_all
Out[9]:
In [10]:
des_all.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\description_allDataset_160502.csv', sep='\t', encoding='utf-8')
In [11]:
dataset.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters_160502.txt', sep='\t', encoding='utf-8',index=False)
In [12]:
#table.groupby('YEARMONTH').CLIENTCODE.nunique()
v_num_lanes = dataset.groupby('vID').vLane.nunique()
In [13]:
v_num_lanes[v_num_lanes > 1].count()
Out[13]:
In [14]:
v_num_lanes[v_num_lanes == 1].count()
Out[14]:
In [16]:
#Drop some field are not necessary for the time being.
dataset = dataset.drop(['fID','tF','lX','lY','vLen','vWid', 'vType','vVel', 'vAcc',
'vLane', 'vPrec', 'vFoll','spac','headway'], axis=1)
dataset[:10]
Out[16]:
In [32]:
def save_graph(graph,file_name):
#initialze Figure
plt.figure(num=None, figsize=(20, 20), dpi=80)
plt.axis('off')
fig = plt.figure(1)
pos = nx.random_layout(graph) #spring_layout(graph)
nx.draw_networkx_nodes(graph,pos)
nx.draw_networkx_edges(graph,pos)
nx.draw_networkx_labels(graph,pos)
#cut = 1.00
#xmax = cut * max(xx for xx, yy in pos.values())
#ymax = cut * max(yy for xx, yy in pos.values())
#plt.xlim(0, xmax)
#plt.ylim(0, ymax)
plt.savefig(file_name,bbox_inches="tight")
pylab.close()
del fig
In [33]:
times = dataset['Time'].unique()
#data = pd.DataFrame()
#data = data.fillna(0) # with 0s rather than NaNs
dTime = pd.DataFrame()
for time in times:
#print 'Time %i ' %time
dataTime0 = dataset.loc[dataset['Time'] == time]
list_vIDs = dataTime0.vID.tolist()
#print list_vIDs
dataTime = dataTime0.set_index("vID")
#index_dataTime = dataTime.index.values
#print dataTime
perm = list(permutations(list_vIDs,2))
#print perm
dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) +
(((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'})
#Create the fields vID and To
dataDist['FromTo'] = dataDist.index
dataDist['From'] = dataDist.FromTo.str[0]
dataDist['To'] = dataDist.FromTo.str[1]
#I multiply by 100 in order to scale the number
dataDist['weight'] = (1/dataDist.dist)*100
#Delete the intermediate FromTo field
dataDist = dataDist.drop('FromTo', 1)
graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight'])
save_graph(graph,'D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\graphs\\%i_my_graph.png' %time)
In [ ]: