In [373]:
%reset


Once deleted, variables cannot be recovered. Proceed (y/[n])? y

Loading the necessary libraries


In [522]:
%matplotlib inline

from pandas import Series, DataFrame
import pandas as pd
from itertools import *
import itertools
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
from matplotlib import pylab
from scipy.signal import hilbert, chirp
import scipy
import networkx as nx

Loading the dataset 0750-0805

Description of the dataset is at: D:/zzzLola/PhD/DataSet/US101/US101_time_series/US-101-Main-Data/vehicle-trajectory-data/trajectory-data-dictionary.htm


In [375]:
c_dataset = ['vID','fID', 'tF', 'Time', 'lX', 'lY', 'gX', 'gY', 'vLen', 'vWid', 'vType','vVel', 'vAcc', 'vLane', 'vPrec', 'vFoll', 'spac','headway' ]

dataset = pd.read_table('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters_sample.txt', sep=r"\s+", 
                        header=None, names=c_dataset)

In [376]:
dataset


Out[376]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
0 2 60 437 1118846984900 1.380168 20.013916 1.966345e+06 570955.526311 1.347094 0.455225 2 3.715193 0.000000 2 0 13 0.000000 0.00
1 5 60 452 1118846984900 3.541185 21.475188 1.966343e+06 570947.061406 1.579352 0.733934 2 3.032355 -0.317728 4 0 8 0.000000 0.00
2 8 60 448 1118846984900 3.505418 16.129268 1.966331e+06 570959.912078 1.439997 0.501676 2 3.403038 -0.561134 4 5 21 5.345641 1.57
3 9 60 409 1118846984900 2.535231 9.537426 1.966318e+06 570978.013541 1.765158 0.594579 2 3.804379 0.640102 3 0 0 0.000000 0.00
4 10 60 436 1118846984900 0.408123 11.131921 1.966327e+06 570978.935866 1.579352 0.641031 2 3.376096 -1.011714 1 0 12 0.000000 0.00
5 2 66 437 1118846985500 1.453004 22.195372 1.966349e+06 570950.110015 1.347094 0.455225 2 3.690109 1.040514 3 0 13 0.000000 0.00
6 5 66 452 1118846985500 3.673851 23.545161 1.966348e+06 570941.785318 1.579352 0.733934 2 3.710547 -0.023226 4 0 8 0.000000 0.00
7 8 66 448 1118846985500 3.554656 18.140712 1.966335e+06 570955.029487 1.439997 0.501676 2 3.330574 -0.530476 3 5 21 5.404170 1.62
8 9 66 409 1118846985500 2.562730 11.800173 1.966323e+06 570972.518606 1.765158 0.594579 2 3.711476 0.007432 3 0 0 0.000000 0.00
9 10 66 436 1118846985500 0.428469 13.084557 1.966331e+06 570974.203236 1.579352 0.641031 2 3.251606 0.000000 1 0 12 0.000000 0.00
10 12 66 443 1118846985500 0.360278 9.040952 1.966322e+06 570984.058944 1.347094 0.548128 2 3.866625 -1.040514 1 10 23 4.044069 1.05
11 1917 5963 479 1118847575200 5.116263 197.107735 1.966773e+06 570561.850289 1.021933 0.371612 2 4.187140 0.000000 5 0 1922 0.000000 0.00
12 1919 5963 522 1118847575200 3.117269 188.362493 1.966755e+06 570585.009602 0.929030 0.464515 2 3.254393 -0.016723 3 1915 1920 4.560610 1.40
  • What is the number of different vehicles for the 15 min
  • How many timestamps? Are the timestamps of the vehicles matched?
  • To transfor the distaces, veloc and acceleration to meters, m/s.
  • To compute the distances all to all.
  • Compute the time cycles.

In [377]:
numV = dataset['vID'].unique()
len(numV)


Out[377]:
8

In [378]:
numTS = dataset['Time'].unique()
len(numTS)


Out[378]:
3

15min = 900 s = 9000 ms // 9529ms = 952.9s = 15min 52.9s The actual temporal length of this dataset is 15min 52.9s. Looks like the timestamp of the vehicles is matches. Which make sense attending to the way the data is obtained. There is no GPS on the vehicles, but from cameras synchronized localized at different buildings.

For every time stamp, check how many vehicles are accelerating when the one behind is also or not... :

  • vehicle_acceleration vs precedin_vehicl_acceleration
  • vehicle_acceleration vs follower_vehicl_acceleration

When is a vehicle changing lanes?


In [379]:
dataset['tF'].describe()


Out[379]:
count     13.000000
mean     446.769231
std       28.934276
min      409.000000
25%      436.000000
50%      443.000000
75%      452.000000
max      522.000000
Name: tF, dtype: float64

In [380]:
des_all = dataset.describe()
des_all


Out[380]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
count 13.000000 13.000000 13.000000 1.300000e+01 13.000000 13.000000 1.300000e+01 13.000000 13.000000 13.000000 13.0 13.000000 13.000000 13.000000 13.000000 13.000000 13.000000 13.000000
mean 301.230769 970.923077 446.769231 1.118847e+12 2.433588 43.197298 1.966400e+06 570903.385746 1.439997 0.556704 2.0 3.564118 -0.139497 2.846154 148.846154 305.615385 1.488807 0.433846
std 717.549203 2215.548896 28.934276 2.215549e+05 1.509820 66.570052 1.620189e+02 147.121205 0.250149 0.111578 0.0 0.317733 0.580500 1.281025 530.672348 716.963451 2.347330 0.689499
min 2.000000 60.000000 409.000000 1.118847e+12 0.360278 9.040952 1.966318e+06 570561.850289 0.929030 0.371612 2.0 3.032355 -1.040514 1.000000 0.000000 0.000000 0.000000 0.000000
25% 5.000000 60.000000 436.000000 1.118847e+12 1.380168 11.800173 1.966327e+06 570947.061406 1.347094 0.464515 2.0 3.330574 -0.530476 2.000000 0.000000 8.000000 0.000000 0.000000
50% 9.000000 66.000000 443.000000 1.118847e+12 2.562730 18.140712 1.966335e+06 570955.526311 1.439997 0.548128 2.0 3.690109 -0.016723 3.000000 0.000000 13.000000 0.000000 0.000000
75% 10.000000 66.000000 452.000000 1.118847e+12 3.541185 22.195372 1.966348e+06 570974.203236 1.579352 0.641031 2.0 3.715193 0.000000 4.000000 5.000000 21.000000 4.044069 1.050000
max 1919.000000 5963.000000 522.000000 1.118848e+12 5.116263 197.107735 1.966773e+06 570984.058944 1.765158 0.733934 2.0 4.187140 1.040514 5.000000 1915.000000 1922.000000 5.404170 1.620000

In [381]:
#des_all.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\description_allDataset.csv', sep='\t', encoding='utf-8')

In [382]:
#dataset.to_csv('D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\dataset_meters.txt', sep='\t', encoding='utf-8')

In [383]:
#table.groupby('YEARMONTH').CLIENTCODE.nunique()
v_num_lanes = dataset.groupby('vID').vLane.nunique()

In [384]:
v_num_lanes[v_num_lanes > 1].count()


Out[384]:
2

In [385]:
v_num_lanes[v_num_lanes == 1].count()


Out[385]:
6

In [386]:
dataset[:10]


Out[386]:
vID fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
0 2 60 437 1118846984900 1.380168 20.013916 1.966345e+06 570955.526311 1.347094 0.455225 2 3.715193 0.000000 2 0 13 0.000000 0.00
1 5 60 452 1118846984900 3.541185 21.475188 1.966343e+06 570947.061406 1.579352 0.733934 2 3.032355 -0.317728 4 0 8 0.000000 0.00
2 8 60 448 1118846984900 3.505418 16.129268 1.966331e+06 570959.912078 1.439997 0.501676 2 3.403038 -0.561134 4 5 21 5.345641 1.57
3 9 60 409 1118846984900 2.535231 9.537426 1.966318e+06 570978.013541 1.765158 0.594579 2 3.804379 0.640102 3 0 0 0.000000 0.00
4 10 60 436 1118846984900 0.408123 11.131921 1.966327e+06 570978.935866 1.579352 0.641031 2 3.376096 -1.011714 1 0 12 0.000000 0.00
5 2 66 437 1118846985500 1.453004 22.195372 1.966349e+06 570950.110015 1.347094 0.455225 2 3.690109 1.040514 3 0 13 0.000000 0.00
6 5 66 452 1118846985500 3.673851 23.545161 1.966348e+06 570941.785318 1.579352 0.733934 2 3.710547 -0.023226 4 0 8 0.000000 0.00
7 8 66 448 1118846985500 3.554656 18.140712 1.966335e+06 570955.029487 1.439997 0.501676 2 3.330574 -0.530476 3 5 21 5.404170 1.62
8 9 66 409 1118846985500 2.562730 11.800173 1.966323e+06 570972.518606 1.765158 0.594579 2 3.711476 0.007432 3 0 0 0.000000 0.00
9 10 66 436 1118846985500 0.428469 13.084557 1.966331e+06 570974.203236 1.579352 0.641031 2 3.251606 0.000000 1 0 12 0.000000 0.00

def calculateDistance(x1,y1,x2,y2):
dist = math.sqrt((x2 - x1)2 + (y2 - y1)2)
return dist

result = df1.append(df2)

count = 0 dist = 0

create an empty dataframe

index = pd.date_range(todays_date-datetime.timedelta(10), periods=10, freq='D')

columns_dist = ['vIDa','Timea', 'gXa', 'gYa', 'vTypea','vVela', 'vAcca', 'vLanea', 'vPreca', 'vFolla', 'vIDb','Timeb', 'gXb', 'gYb', 'vTypeb','vVelb', 'vAccb', 'vLaneb', 'vPrecb', 'vFollb']

df_ = pd.DataFrame(index=index, columns=columns)

df_dist = pd.DataFrame(columns=columns_dist) df_dist = df_dist.fillna(0) # with 0s rather than NaNs

Fill the dataframe

df = df.append(data)

times = dataset['Time'].unique() for time in times: print 'Time %i ' %time dataTime = dataset.loc[dataset['Time'] == time]

row_iterator = dataTime.iterrows()

for index, row in row_iterator:
    if index+1 > len(dataTime)-1:
        print 'The index is %i ' %index
        print row['vID']
        print dataTime.iloc[index+1]['vID']


    #while row.notnull == True:
    #    last = row_iterator.next()

    #    print last


    #if ((index+1)):
    #    j=index+1
    #    print 'The index+1 is: %i' %j

    #    for j, row in dataTime.iterrows():
    #        #dist = calculateDistance(dataTime[index,'gX'],dataTime[index,'gY'],dataTime[j,'gX'],dataTime[j,'gY'],)
    #        #i_data = array_data.tolist
    #        #dist_med = (array_data[i, 3], array_data[i, 0], array_data[j,0], dist, array_data[i, 10], array_data[i, 11], 
    #                    #array_data[i, 13],array_data[i, 14], array_data[i, 15])
    #        #dist_list.append(dist_med)
    #        count = len(dataTime)
    #print ('The count is: %i' %count)
#count = 0


#dist = calculateDistance()

In [387]:
#len(dataTime)

if i+1 > len(df)-1: pass elif (df.loc[i+1,'a_d'] == df.loc [i,'a_d']): pass elif (df.loc [i+2,'station'] == df.loc [i,'station'] and (df.loc [i+2,'direction'] == df.loc [i,'direction'])): pass else: df.loc[i,'value_id'] = value_id

import pandas as pd from itertools import izip

df = pd.DataFrame(['AA', 'BB', 'CC'], columns = ['value'])

for id1, id2 in izip(df.iterrows(),df.ix[1:].iterrows()): print id1[1]['value'] print id2[1]['value']

Calculation of DISTANCES


In [388]:
data = dataset.set_index("vID")

In [389]:
data[:13]


Out[389]:
fID tF Time lX lY gX gY vLen vWid vType vVel vAcc vLane vPrec vFoll spac headway
vID
2 60 437 1118846984900 1.380168 20.013916 1.966345e+06 570955.526311 1.347094 0.455225 2 3.715193 0.000000 2 0 13 0.000000 0.00
5 60 452 1118846984900 3.541185 21.475188 1.966343e+06 570947.061406 1.579352 0.733934 2 3.032355 -0.317728 4 0 8 0.000000 0.00
8 60 448 1118846984900 3.505418 16.129268 1.966331e+06 570959.912078 1.439997 0.501676 2 3.403038 -0.561134 4 5 21 5.345641 1.57
9 60 409 1118846984900 2.535231 9.537426 1.966318e+06 570978.013541 1.765158 0.594579 2 3.804379 0.640102 3 0 0 0.000000 0.00
10 60 436 1118846984900 0.408123 11.131921 1.966327e+06 570978.935866 1.579352 0.641031 2 3.376096 -1.011714 1 0 12 0.000000 0.00
2 66 437 1118846985500 1.453004 22.195372 1.966349e+06 570950.110015 1.347094 0.455225 2 3.690109 1.040514 3 0 13 0.000000 0.00
5 66 452 1118846985500 3.673851 23.545161 1.966348e+06 570941.785318 1.579352 0.733934 2 3.710547 -0.023226 4 0 8 0.000000 0.00
8 66 448 1118846985500 3.554656 18.140712 1.966335e+06 570955.029487 1.439997 0.501676 2 3.330574 -0.530476 3 5 21 5.404170 1.62
9 66 409 1118846985500 2.562730 11.800173 1.966323e+06 570972.518606 1.765158 0.594579 2 3.711476 0.007432 3 0 0 0.000000 0.00
10 66 436 1118846985500 0.428469 13.084557 1.966331e+06 570974.203236 1.579352 0.641031 2 3.251606 0.000000 1 0 12 0.000000 0.00
12 66 443 1118846985500 0.360278 9.040952 1.966322e+06 570984.058944 1.347094 0.548128 2 3.866625 -1.040514 1 10 23 4.044069 1.05
1917 5963 479 1118847575200 5.116263 197.107735 1.966773e+06 570561.850289 1.021933 0.371612 2 4.187140 0.000000 5 0 1922 0.000000 0.00
1919 5963 522 1118847575200 3.117269 188.362493 1.966755e+06 570585.009602 0.929030 0.464515 2 3.254393 -0.016723 3 1915 1920 4.560610 1.40

In [390]:
#Must be before, I guess. 
dataset = dataset.drop(['fID','tF','lX','lY','vLen','vWid','spac','headway'], axis=1)

In [391]:
dataset


Out[391]:
vID Time gX gY vType vVel vAcc vLane vPrec vFoll
0 2 1118846984900 1.966345e+06 570955.526311 2 3.715193 0.000000 2 0 13
1 5 1118846984900 1.966343e+06 570947.061406 2 3.032355 -0.317728 4 0 8
2 8 1118846984900 1.966331e+06 570959.912078 2 3.403038 -0.561134 4 5 21
3 9 1118846984900 1.966318e+06 570978.013541 2 3.804379 0.640102 3 0 0
4 10 1118846984900 1.966327e+06 570978.935866 2 3.376096 -1.011714 1 0 12
5 2 1118846985500 1.966349e+06 570950.110015 2 3.690109 1.040514 3 0 13
6 5 1118846985500 1.966348e+06 570941.785318 2 3.710547 -0.023226 4 0 8
7 8 1118846985500 1.966335e+06 570955.029487 2 3.330574 -0.530476 3 5 21
8 9 1118846985500 1.966323e+06 570972.518606 2 3.711476 0.007432 3 0 0
9 10 1118846985500 1.966331e+06 570974.203236 2 3.251606 0.000000 1 0 12
10 12 1118846985500 1.966322e+06 570984.058944 2 3.866625 -1.040514 1 10 23
11 1917 1118847575200 1.966773e+06 570561.850289 2 4.187140 0.000000 5 0 1922
12 1919 1118847575200 1.966755e+06 570585.009602 2 3.254393 -0.016723 3 1915 1920

This code works!!

NO TOCAR


In [421]:
times = dataset['Time'].unique()

data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs

dTime = pd.DataFrame()

for time in times:
    print 'Time %i ' %time
    
    dataTime0 = dataset.loc[dataset['Time'] == time] 
    
    list_vIDs = dataTime0.vID.tolist()
    #print list_vIDs
    
    dataTime = dataTime0.set_index("vID")
    #index_dataTime = dataTime.index.values
    #print dataTime
    
    perm = list(permutations(list_vIDs,2))
    #print perm
    
    dist = pd.DataFrame([((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) +
                         (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5
                         for p in perm] , index=perm, columns = {'dist'}) 
    #dist['time'] = time ##Matrix with dist and time
    
    #merge dataTime with distances
    dist['FromTo'] = dist.index
    dist['vID'] = dist.FromTo.str[0]
    dist['To'] = dist.FromTo.str[1]
    
    dataTimeDist = pd.merge(dataTime0,dist, on = 'vID')
    
    dataTimeDist = dataTimeDist.drop(['gX','gY'], axis=1)
    
    print dataTimeDist
    
    data = data.append(dataTimeDist)
    
data


Time 1118846984900 
    vID           Time  vType      vVel      vAcc  vLane  vPrec  vFoll  \
0     2  1118846984900      2  3.715193  0.000000      2      0     13   
1     2  1118846984900      2  3.715193  0.000000      2      0     13   
2     2  1118846984900      2  3.715193  0.000000      2      0     13   
3     2  1118846984900      2  3.715193  0.000000      2      0     13   
4     5  1118846984900      2  3.032355 -0.317728      4      0      8   
5     5  1118846984900      2  3.032355 -0.317728      4      0      8   
6     5  1118846984900      2  3.032355 -0.317728      4      0      8   
7     5  1118846984900      2  3.032355 -0.317728      4      0      8   
8     8  1118846984900      2  3.403038 -0.561134      4      5     21   
9     8  1118846984900      2  3.403038 -0.561134      4      5     21   
10    8  1118846984900      2  3.403038 -0.561134      4      5     21   
11    8  1118846984900      2  3.403038 -0.561134      4      5     21   
12    9  1118846984900      2  3.804379  0.640102      3      0      0   
13    9  1118846984900      2  3.804379  0.640102      3      0      0   
14    9  1118846984900      2  3.804379  0.640102      3      0      0   
15    9  1118846984900      2  3.804379  0.640102      3      0      0   
16   10  1118846984900      2  3.376096 -1.011714      1      0     12   
17   10  1118846984900      2  3.376096 -1.011714      1      0     12   
18   10  1118846984900      2  3.376096 -1.011714      1      0     12   
19   10  1118846984900      2  3.376096 -1.011714      1      0     12   

         dist   FromTo  To  
0    8.636367   (2, 5)   5  
1   14.525891   (2, 8)   8  
2   34.661598   (2, 9)   9  
3   29.350098  (2, 10)  10  
4    8.636367   (5, 2)   2  
5   17.675203   (5, 8)   8  
6   39.577558   (5, 9)   9  
7   35.660957  (5, 10)  10  
8   14.525891   (8, 2)   2  
9   17.675203   (8, 5)   5  
10  22.014576   (8, 9)   9  
11  19.410592  (8, 10)  10  
12  34.661598   (9, 2)   2  
13  39.577558   (9, 5)   5  
14  22.014576   (9, 8)   8  
15   8.722292  (9, 10)  10  
16  29.350098  (10, 2)   2  
17  35.660957  (10, 5)   5  
18  19.410592  (10, 8)   8  
19   8.722292  (10, 9)   9  
Time 1118846985500 
    vID           Time  vType      vVel      vAcc  vLane  vPrec  vFoll  \
0     2  1118846985500      2  3.690109  1.040514      3      0     13   
1     2  1118846985500      2  3.690109  1.040514      3      0     13   
2     2  1118846985500      2  3.690109  1.040514      3      0     13   
3     2  1118846985500      2  3.690109  1.040514      3      0     13   
4     2  1118846985500      2  3.690109  1.040514      3      0     13   
5     5  1118846985500      2  3.710547 -0.023226      4      0      8   
6     5  1118846985500      2  3.710547 -0.023226      4      0      8   
7     5  1118846985500      2  3.710547 -0.023226      4      0      8   
8     5  1118846985500      2  3.710547 -0.023226      4      0      8   
9     5  1118846985500      2  3.710547 -0.023226      4      0      8   
10    8  1118846985500      2  3.330574 -0.530476      3      5     21   
11    8  1118846985500      2  3.330574 -0.530476      3      5     21   
12    8  1118846985500      2  3.330574 -0.530476      3      5     21   
13    8  1118846985500      2  3.330574 -0.530476      3      5     21   
14    8  1118846985500      2  3.330574 -0.530476      3      5     21   
15    9  1118846985500      2  3.711476  0.007432      3      0      0   
16    9  1118846985500      2  3.711476  0.007432      3      0      0   
17    9  1118846985500      2  3.711476  0.007432      3      0      0   
18    9  1118846985500      2  3.711476  0.007432      3      0      0   
19    9  1118846985500      2  3.711476  0.007432      3      0      0   
20   10  1118846985500      2  3.251606  0.000000      1      0     12   
21   10  1118846985500      2  3.251606  0.000000      1      0     12   
22   10  1118846985500      2  3.251606  0.000000      1      0     12   
23   10  1118846985500      2  3.251606  0.000000      1      0     12   
24   10  1118846985500      2  3.251606  0.000000      1      0     12   
25   12  1118846985500      2  3.866625 -1.040514      1     10     23   
26   12  1118846985500      2  3.866625 -1.040514      1     10     23   
27   12  1118846985500      2  3.866625 -1.040514      1     10     23   
28   12  1118846985500      2  3.866625 -1.040514      1     10     23   
29   12  1118846985500      2  3.866625 -1.040514      1     10     23   

         dist    FromTo  To  
0    8.532400    (2, 5)   5  
1   15.162025    (2, 8)   8  
2   34.576029    (2, 9)   9  
3   30.276465   (2, 10)  10  
4   43.500070   (2, 12)  12  
5    8.532400    (5, 2)   2  
6   18.191300    (5, 8)   8  
7   39.279143    (5, 9)   9  
8   36.359139   (5, 10)  10  
9   49.279846   (5, 12)  12  
10  15.162025    (8, 2)   2  
11  18.191300    (8, 5)   5  
12  21.204421    (8, 9)   9  
13  19.585214   (8, 10)  10  
14  31.748862   (8, 12)  12  
15  34.576029    (9, 2)   2  
16  39.279143    (9, 5)   5  
17  21.204421    (9, 8)   8  
18   8.171956   (9, 10)  10  
19  11.572803   (9, 12)  12  
20  30.276465   (10, 2)   2  
21  36.359139   (10, 5)   5  
22  19.585214   (10, 8)   8  
23   8.171956   (10, 9)   9  
24  13.254505  (10, 12)  12  
25  43.500070   (12, 2)   2  
26  49.279846   (12, 5)   5  
27  31.748862   (12, 8)   8  
28  11.572803   (12, 9)   9  
29  13.254505  (12, 10)  10  
Time 1118847575200 
    vID           Time  vType      vVel      vAcc  vLane  vPrec  vFoll  \
0  1917  1118847575200      2  4.187140  0.000000      5      0   1922   
1  1919  1118847575200      2  3.254393 -0.016723      3   1915   1920   

        dist        FromTo    To  
0  29.564074  (1917, 1919)  1919  
1  29.564074  (1919, 1917)  1917  
Out[421]:
vID Time vType vVel vAcc vLane vPrec vFoll dist FromTo To
0 2 1118846984900 2 3.715193 0.000000 2 0 13 8.636367 (2, 5) 5
1 2 1118846984900 2 3.715193 0.000000 2 0 13 14.525891 (2, 8) 8
2 2 1118846984900 2 3.715193 0.000000 2 0 13 34.661598 (2, 9) 9
3 2 1118846984900 2 3.715193 0.000000 2 0 13 29.350098 (2, 10) 10
4 5 1118846984900 2 3.032355 -0.317728 4 0 8 8.636367 (5, 2) 2
5 5 1118846984900 2 3.032355 -0.317728 4 0 8 17.675203 (5, 8) 8
6 5 1118846984900 2 3.032355 -0.317728 4 0 8 39.577558 (5, 9) 9
7 5 1118846984900 2 3.032355 -0.317728 4 0 8 35.660957 (5, 10) 10
8 8 1118846984900 2 3.403038 -0.561134 4 5 21 14.525891 (8, 2) 2
9 8 1118846984900 2 3.403038 -0.561134 4 5 21 17.675203 (8, 5) 5
10 8 1118846984900 2 3.403038 -0.561134 4 5 21 22.014576 (8, 9) 9
11 8 1118846984900 2 3.403038 -0.561134 4 5 21 19.410592 (8, 10) 10
12 9 1118846984900 2 3.804379 0.640102 3 0 0 34.661598 (9, 2) 2
13 9 1118846984900 2 3.804379 0.640102 3 0 0 39.577558 (9, 5) 5
14 9 1118846984900 2 3.804379 0.640102 3 0 0 22.014576 (9, 8) 8
15 9 1118846984900 2 3.804379 0.640102 3 0 0 8.722292 (9, 10) 10
16 10 1118846984900 2 3.376096 -1.011714 1 0 12 29.350098 (10, 2) 2
17 10 1118846984900 2 3.376096 -1.011714 1 0 12 35.660957 (10, 5) 5
18 10 1118846984900 2 3.376096 -1.011714 1 0 12 19.410592 (10, 8) 8
19 10 1118846984900 2 3.376096 -1.011714 1 0 12 8.722292 (10, 9) 9
0 2 1118846985500 2 3.690109 1.040514 3 0 13 8.532400 (2, 5) 5
1 2 1118846985500 2 3.690109 1.040514 3 0 13 15.162025 (2, 8) 8
2 2 1118846985500 2 3.690109 1.040514 3 0 13 34.576029 (2, 9) 9
3 2 1118846985500 2 3.690109 1.040514 3 0 13 30.276465 (2, 10) 10
4 2 1118846985500 2 3.690109 1.040514 3 0 13 43.500070 (2, 12) 12
5 5 1118846985500 2 3.710547 -0.023226 4 0 8 8.532400 (5, 2) 2
6 5 1118846985500 2 3.710547 -0.023226 4 0 8 18.191300 (5, 8) 8
7 5 1118846985500 2 3.710547 -0.023226 4 0 8 39.279143 (5, 9) 9
8 5 1118846985500 2 3.710547 -0.023226 4 0 8 36.359139 (5, 10) 10
9 5 1118846985500 2 3.710547 -0.023226 4 0 8 49.279846 (5, 12) 12
10 8 1118846985500 2 3.330574 -0.530476 3 5 21 15.162025 (8, 2) 2
11 8 1118846985500 2 3.330574 -0.530476 3 5 21 18.191300 (8, 5) 5
12 8 1118846985500 2 3.330574 -0.530476 3 5 21 21.204421 (8, 9) 9
13 8 1118846985500 2 3.330574 -0.530476 3 5 21 19.585214 (8, 10) 10
14 8 1118846985500 2 3.330574 -0.530476 3 5 21 31.748862 (8, 12) 12
15 9 1118846985500 2 3.711476 0.007432 3 0 0 34.576029 (9, 2) 2
16 9 1118846985500 2 3.711476 0.007432 3 0 0 39.279143 (9, 5) 5
17 9 1118846985500 2 3.711476 0.007432 3 0 0 21.204421 (9, 8) 8
18 9 1118846985500 2 3.711476 0.007432 3 0 0 8.171956 (9, 10) 10
19 9 1118846985500 2 3.711476 0.007432 3 0 0 11.572803 (9, 12) 12
20 10 1118846985500 2 3.251606 0.000000 1 0 12 30.276465 (10, 2) 2
21 10 1118846985500 2 3.251606 0.000000 1 0 12 36.359139 (10, 5) 5
22 10 1118846985500 2 3.251606 0.000000 1 0 12 19.585214 (10, 8) 8
23 10 1118846985500 2 3.251606 0.000000 1 0 12 8.171956 (10, 9) 9
24 10 1118846985500 2 3.251606 0.000000 1 0 12 13.254505 (10, 12) 12
25 12 1118846985500 2 3.866625 -1.040514 1 10 23 43.500070 (12, 2) 2
26 12 1118846985500 2 3.866625 -1.040514 1 10 23 49.279846 (12, 5) 5
27 12 1118846985500 2 3.866625 -1.040514 1 10 23 31.748862 (12, 8) 8
28 12 1118846985500 2 3.866625 -1.040514 1 10 23 11.572803 (12, 9) 9
29 12 1118846985500 2 3.866625 -1.040514 1 10 23 13.254505 (12, 10) 10
0 1917 1118847575200 2 4.187140 0.000000 5 0 1922 29.564074 (1917, 1919) 1919
1 1919 1118847575200 2 3.254393 -0.016723 3 1915 1920 29.564074 (1919, 1917) 1917

Computing the GRAPH

IT WORKS DO NOT TOUCH!!


In [559]:
def save_graph(graph,file_name):
    #initialze Figure
    plt.figure(num=None, figsize=(20, 20), dpi=80)
    plt.axis('off')
    fig = plt.figure(1)
    pos = nx.spring_layout(graph)
    nx.draw_networkx_nodes(graph,pos)
    nx.draw_networkx_edges(graph,pos)
    nx.draw_networkx_labels(graph,pos)

    #cut = 1.00
    #xmax = cut * max(xx for xx, yy in pos.values())
    #ymax = cut * max(yy for xx, yy in pos.values())
    #plt.xlim(0, xmax)
    #plt.ylim(0, ymax)

    plt.savefig(file_name,bbox_inches="tight")
    pylab.close()
    del fig

In [554]:
times = dataset['Time'].unique()

data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs

data_graph = pd.DataFrame()
data_graph = data.fillna(0)

dTime = pd.DataFrame()

for time in times:
    #print 'Time %i ' %time
    
    dataTime0 = dataset.loc[dataset['Time'] == time] 
    
    list_vIDs = dataTime0.vID.tolist()
    #print list_vIDs
    
    dataTime = dataTime0.set_index("vID")
    #index_dataTime = dataTime.index.values
    #print dataTime
    
    perm = list(permutations(list_vIDs,2))
    #print perm
    dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + 
            (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
    dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'}) 
    #Convert the matrix into a square matrix
    #Create the fields vID and To
    dataDist['FromTo'] = dataDist.index
    dataDist['vID'] = dataDist.FromTo.str[0]
    dataDist['To'] = dataDist.FromTo.str[1]
    #I multi
    dataDist['inv_dist'] = (1/dataDist.dist)*100
    
    #Delete the intermediate FromTo field
    dataDist = dataDist.drop('FromTo', 1)
    
    #With pivot and the 3 columns I can generate the square matrix
    #Here is where I should have the condition of the max distance: THRESHOLD
    dataGraph = dataDist.pivot(index='vID', columns='To', values = 'inv_dist').fillna(0)
    
    print dataDist
    
    #graph = nx.from_numpy_matrix(dataGraph.values)
    #graph = nx.relabel_nodes(graph, dict(enumerate(dataGraph.columns))) 
    
    #save_graph(graph,'my_graph+%i.png' %time)
    
    #print dataDist
    #data = data.append(dist)


              dist  vID  To   inv_dist
(2, 5)    8.636367    2   5  11.578943
(2, 8)   14.525891    2   8   6.884259
(2, 9)   34.661598    2   9   2.885037
(2, 10)  29.350098    2  10   3.407144
(5, 2)    8.636367    5   2  11.578943
(5, 8)   17.675203    5   8   5.657644
(5, 9)   39.577558    5   9   2.526684
(5, 10)  35.660957    5  10   2.804187
(8, 2)   14.525891    8   2   6.884259
(8, 5)   17.675203    8   5   5.657644
(8, 9)   22.014576    8   9   4.542445
(8, 10)  19.410592    8  10   5.151826
(9, 2)   34.661598    9   2   2.885037
(9, 5)   39.577558    9   5   2.526684
(9, 8)   22.014576    9   8   4.542445
(9, 10)   8.722292    9  10  11.464876
(10, 2)  29.350098   10   2   3.407144
(10, 5)  35.660957   10   5   2.804187
(10, 8)  19.410592   10   8   5.151826
(10, 9)   8.722292   10   9  11.464876
               dist  vID  To   inv_dist
(2, 5)     8.532400    2   5  11.720031
(2, 8)    15.162025    2   8   6.595425
(2, 9)    34.576029    2   9   2.892177
(2, 10)   30.276465    2  10   3.302896
(2, 12)   43.500070    2  12   2.298847
(5, 2)     8.532400    5   2  11.720031
(5, 8)    18.191300    5   8   5.497133
(5, 9)    39.279143    5   9   2.545880
(5, 10)   36.359139    5  10   2.750340
(5, 12)   49.279846    5  12   2.029227
(8, 2)    15.162025    8   2   6.595425
(8, 5)    18.191300    8   5   5.497133
(8, 9)    21.204421    8   9   4.715998
(8, 10)   19.585214    8  10   5.105893
(8, 12)   31.748862    8  12   3.149719
(9, 2)    34.576029    9   2   2.892177
(9, 5)    39.279143    9   5   2.545880
(9, 8)    21.204421    9   8   4.715998
(9, 10)    8.171956    9  10  12.236972
(9, 12)   11.572803    9  12   8.640949
(10, 2)   30.276465   10   2   3.302896
(10, 5)   36.359139   10   5   2.750340
(10, 8)   19.585214   10   8   5.105893
(10, 9)    8.171956   10   9  12.236972
(10, 12)  13.254505   10  12   7.544605
(12, 2)   43.500070   12   2   2.298847
(12, 5)   49.279846   12   5   2.029227
(12, 8)   31.748862   12   8   3.149719
(12, 9)   11.572803   12   9   8.640949
(12, 10)  13.254505   12  10   7.544605
                   dist   vID    To  inv_dist
(1917, 1919)  29.564074  1917  1919  3.382484
(1919, 1917)  29.564074  1919  1917  3.382484

Using from_pandas_dataframe


In [574]:
def save_graph(graph,my_weight,file_name):
    #initialze Figure
    plt.figure(num=None, figsize=(20, 20), dpi=80)
    plt.axis('off')
    fig = plt.figure(1)
    pos = nx.spring_layout(graph,weight='my_weight') #spring_layout(graph)
    nx.draw_networkx_nodes(graph,pos)
    nx.draw_networkx_edges(graph,pos)
    nx.draw_networkx_labels(graph,pos)

    #cut = 1.00
    #xmax = cut * max(xx for xx, yy in pos.values())
    #ymax = cut * max(yy for xx, yy in pos.values())
    #plt.xlim(0, xmax)
    #plt.ylim(0, ymax)

    plt.savefig(file_name,bbox_inches="tight")
    pylab.close()
    del fig

In [575]:
times = dataset['Time'].unique()

data = pd.DataFrame()
data = data.fillna(0) # with 0s rather than NaNs

dTime = pd.DataFrame()

for time in times:
    #print 'Time %i ' %time
    
    dataTime0 = dataset.loc[dataset['Time'] == time] 
    
    list_vIDs = dataTime0.vID.tolist()
    #print list_vIDs
    
    dataTime = dataTime0.set_index("vID")
    #index_dataTime = dataTime.index.values
    #print dataTime
    
    perm = list(permutations(list_vIDs,2))
    #print perm
    dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + 
            (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
    dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'}) 
    

    #Create the fields vID and To
    dataDist['FromTo'] = dataDist.index
    dataDist['From'] = dataDist.FromTo.str[0]
    dataDist['To'] = dataDist.FromTo.str[1]
    #I multiply by 100 in order to scale the number
    dataDist['weight'] = (1/dataDist.dist)*100
    
    #Delete the intermediate FromTo field
    dataDist = dataDist.drop('FromTo', 1)
    

    
    graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight'])
 

    save_graph(graph,'weight','000_my_graph+%i.png' %time)

In [581]:
dataDist


Out[581]:
dist From To weight
(1917, 1919) 29.564074 1917 1919 3.382484
(1919, 1917) 29.564074 1919 1917 3.382484

In [ ]:


In [582]:
graph[1917][1919]['weight']


Out[582]:
3.3824837714769527

In [ ]: