In [373]:
%reset
Loading the necessary libraries
In [522]:
%matplotlib inline
from pandas import Series, DataFrame
import pandas as pd
from itertools import *
import itertools
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
from matplotlib import pylab
from scipy.signal import hilbert, chirp
import scipy
import networkx as nx
Loading the dataset 0750-0805
Description of the dataset is at: D:/zzzLola/PhD/DataSet/US101/US101_time_series/US-101-Main-Data/vehicle-trajectory-data/trajectory-data-dictionary.htm
In [375]:
c_dataset = ['vID','fID', 'tF', 'Time', 'lX', 'lY', 'gX', 'gY', 'vLen', 'vWid', 'vType','vVel', 'vAcc', 'vLane', 'vPrec', 'vFoll', 'spac','headway' ]
dataset = pd.read_table('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters_sample.txt', sep=r"\s+",
header=None, names=c_dataset)
In [376]:
dataset
Out[376]:
In [377]:
numV = dataset['vID'].unique()
len(numV)
Out[377]:
In [378]:
numTS = dataset['Time'].unique()
len(numTS)
Out[378]:
15min = 900 s = 9000 ms // 9529ms = 952.9s = 15min 52.9s The actual temporal length of this dataset is 15min 52.9s. Looks like the timestamp of the vehicles is matches. Which make sense attending to the way the data is obtained. There is no GPS on the vehicles, but from cameras synchronized localized at different buildings.
For every time stamp, check how many vehicles are accelerating when the one behind is also or not... :
When is a vehicle changing lanes?
In [379]:
dataset['tF'].describe()
Out[379]:
In [380]:
des_all = dataset.describe()
des_all
Out[380]:
In [381]:
#des_all.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\description_allDataset.csv', sep='\t', encoding='utf-8')
In [382]:
#dataset.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters.txt', sep='\t', encoding='utf-8')
In [383]:
#table.groupby('YEARMONTH').CLIENTCODE.nunique()
v_num_lanes = dataset.groupby('vID').vLane.nunique()
In [384]:
v_num_lanes[v_num_lanes > 1].count()
Out[384]:
In [385]:
v_num_lanes[v_num_lanes == 1].count()
Out[385]:
In [386]:
dataset[:10]
Out[386]:
def calculateDistance(x1,y1,x2,y2):
dist = math.sqrt((x2 - x1)2 + (y2 - y1)2)
return dist
result = df1.append(df2)
count = 0 dist = 0
columns_dist = ['vIDa','Timea', 'gXa', 'gYa', 'vTypea','vVela', 'vAcca', 'vLanea', 'vPreca', 'vFolla', 'vIDb','Timeb', 'gXb', 'gYb', 'vTypeb','vVelb', 'vAccb', 'vLaneb', 'vPrecb', 'vFollb']
df_dist = pd.DataFrame(columns=columns_dist) df_dist = df_dist.fillna(0) # with 0s rather than NaNs
times = dataset['Time'].unique() for time in times: print 'Time %i ' %time dataTime = dataset.loc[dataset['Time'] == time]
row_iterator = dataTime.iterrows()
for index, row in row_iterator:
if index+1 > len(dataTime)-1:
print 'The index is %i ' %index
print row['vID']
print dataTime.iloc[index+1]['vID']
#while row.notnull == True:
# last = row_iterator.next()
# print last
#if ((index+1)):
# j=index+1
# print 'The index+1 is: %i' %j
# for j, row in dataTime.iterrows():
# #dist = calculateDistance(dataTime[index,'gX'],dataTime[index,'gY'],dataTime[j,'gX'],dataTime[j,'gY'],)
# #i_data = array_data.tolist
# #dist_med = (array_data[i, 3], array_data[i, 0], array_data[j,0], dist, array_data[i, 10], array_data[i, 11],
# #array_data[i, 13],array_data[i, 14], array_data[i, 15])
# #dist_list.append(dist_med)
# count = len(dataTime)
#print ('The count is: %i' %count)
#count = 0
#dist = calculateDistance()
In [387]:
#len(dataTime)
if i+1 > len(df)-1: pass elif (df.loc[i+1,'a_d'] == df.loc [i,'a_d']): pass elif (df.loc [i+2,'station'] == df.loc [i,'station'] and (df.loc [i+2,'direction'] == df.loc [i,'direction'])): pass else: df.loc[i,'value_id'] = value_id
import pandas as pd from itertools import izip
df = pd.DataFrame(['AA', 'BB', 'CC'], columns = ['value'])
for id1, id2 in izip(df.iterrows(),df.ix[1:].iterrows()): print id1[1]['value'] print id2[1]['value']
In [388]:
data = dataset.set_index("vID")
In [389]:
data[:13]
Out[389]:
In [390]:
#Must be before, I guess.
dataset = dataset.drop(['fID','tF','lX','lY','vLen','vWid','spac','headway'], axis=1)
In [391]:
dataset
Out[391]:
In [421]:
times = dataset['Time'].unique()
data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs
dTime = pd.DataFrame()
for time in times:
print 'Time %i ' %time
dataTime0 = dataset.loc[dataset['Time'] == time]
list_vIDs = dataTime0.vID.tolist()
#print list_vIDs
dataTime = dataTime0.set_index("vID")
#index_dataTime = dataTime.index.values
#print dataTime
perm = list(permutations(list_vIDs,2))
#print perm
dist = pd.DataFrame([((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) +
(((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5
for p in perm] , index=perm, columns = {'dist'})
#dist['time'] = time ##Matrix with dist and time
#merge dataTime with distances
dist['FromTo'] = dist.index
dist['vID'] = dist.FromTo.str[0]
dist['To'] = dist.FromTo.str[1]
dataTimeDist = pd.merge(dataTime0,dist, on = 'vID')
dataTimeDist = dataTimeDist.drop(['gX','gY'], axis=1)
print dataTimeDist
data = data.append(dataTimeDist)
data
Out[421]:
In [559]:
def save_graph(graph,file_name):
#initialze Figure
plt.figure(num=None, figsize=(20, 20), dpi=80)
plt.axis('off')
fig = plt.figure(1)
pos = nx.spring_layout(graph)
nx.draw_networkx_nodes(graph,pos)
nx.draw_networkx_edges(graph,pos)
nx.draw_networkx_labels(graph,pos)
#cut = 1.00
#xmax = cut * max(xx for xx, yy in pos.values())
#ymax = cut * max(yy for xx, yy in pos.values())
#plt.xlim(0, xmax)
#plt.ylim(0, ymax)
plt.savefig(file_name,bbox_inches="tight")
pylab.close()
del fig
In [554]:
times = dataset['Time'].unique()
data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs
data_graph = pd.DataFrame()
data_graph = data.fillna(0)
dTime = pd.DataFrame()
for time in times:
#print 'Time %i ' %time
dataTime0 = dataset.loc[dataset['Time'] == time]
list_vIDs = dataTime0.vID.tolist()
#print list_vIDs
dataTime = dataTime0.set_index("vID")
#index_dataTime = dataTime.index.values
#print dataTime
perm = list(permutations(list_vIDs,2))
#print perm
dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) +
(((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'})
#Convert the matrix into a square matrix
#Create the fields vID and To
dataDist['FromTo'] = dataDist.index
dataDist['vID'] = dataDist.FromTo.str[0]
dataDist['To'] = dataDist.FromTo.str[1]
#I multi
dataDist['inv_dist'] = (1/dataDist.dist)*100
#Delete the intermediate FromTo field
dataDist = dataDist.drop('FromTo', 1)
#With pivot and the 3 columns I can generate the square matrix
#Here is where I should have the condition of the max distance: THRESHOLD
dataGraph = dataDist.pivot(index='vID', columns='To', values = 'inv_dist').fillna(0)
print dataDist
#graph = nx.from_numpy_matrix(dataGraph.values)
#graph = nx.relabel_nodes(graph, dict(enumerate(dataGraph.columns)))
#save_graph(graph,'my_graph+%i.png' %time)
#print dataDist
#data = data.append(dist)
In [574]:
def save_graph(graph,my_weight,file_name):
#initialze Figure
plt.figure(num=None, figsize=(20, 20), dpi=80)
plt.axis('off')
fig = plt.figure(1)
pos = nx.spring_layout(graph,weight='my_weight') #spring_layout(graph)
nx.draw_networkx_nodes(graph,pos)
nx.draw_networkx_edges(graph,pos)
nx.draw_networkx_labels(graph,pos)
#cut = 1.00
#xmax = cut * max(xx for xx, yy in pos.values())
#ymax = cut * max(yy for xx, yy in pos.values())
#plt.xlim(0, xmax)
#plt.ylim(0, ymax)
plt.savefig(file_name,bbox_inches="tight")
pylab.close()
del fig
In [575]:
times = dataset['Time'].unique()
data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs
dTime = pd.DataFrame()
for time in times:
#print 'Time %i ' %time
dataTime0 = dataset.loc[dataset['Time'] == time]
list_vIDs = dataTime0.vID.tolist()
#print list_vIDs
dataTime = dataTime0.set_index("vID")
#index_dataTime = dataTime.index.values
#print dataTime
perm = list(permutations(list_vIDs,2))
#print perm
dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) +
(((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'})
#Create the fields vID and To
dataDist['FromTo'] = dataDist.index
dataDist['From'] = dataDist.FromTo.str[0]
dataDist['To'] = dataDist.FromTo.str[1]
#I multiply by 100 in order to scale the number
dataDist['weight'] = (1/dataDist.dist)*100
#Delete the intermediate FromTo field
dataDist = dataDist.drop('FromTo', 1)
graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight'])
save_graph(graph,'weight','000_my_graph+%i.png' %time)
In [581]:
dataDist
Out[581]:
In [ ]:
In [582]:
graph[1917][1919]['weight']
Out[582]:
In [ ]: