In [ ]:
%reset

In [5]:
from datetime import datetime
import pandas as pd
import time
import csv
import math


def calculateDistance(x1,y1,x2,y2):  
     dist = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)  
     return dist 

#print calculateDistance(x1, y1, x2, y2) 



with open('D:\\zzzLola\\PhD\\DataSet\\US101\\test\\100ts.txt') as file1, open('D:\\zzzLola\\PhD\\DataSet\\US101\\test\\dist_100ts.txt','w') as file_out:
    
    reader1 = csv.reader(file1, delimiter='\t')
    reader2 = list(csv.reader(open("D:\\zzzLola\\PhD\\DataSet\\US101\\test\\100ts.txt"), delimiter = "\t"))
    writer = csv.writer(file_out, delimiter='\t', lineterminator='\n')
    
    for row1 in reader1: 
        for row2 in reader2:
            #Calculate the distance between potins that are at the same time and are not the same point
            if row1[3] == row2[3] and row1[0] != row2[0]:
                ts = row1[3]
                vID1 = row1[0]
                vID2 = row2[0]
                dist = calculateDistance(float(row1[6]),float(row1[7]),float(row2[6]),float(row2[7]))
                file_out.write(ts + '\t' + vID1 + '\t' + vID2 + '\t' + str(dist) + '\t' + str(row1[10]) + "\n")

In [116]:
c_names = ['timestamp', 'vID_o', 'vID_d','dist']

distances = pd.read_table('D:\\zzzLola\\PhD\\DataSet\\US101\\test\\dist.txt', sep='\t', header=None, names=c_names)

In [117]:
distances


Out[117]:
timestamp vID_o vID_d dist
0 1118846980200 2 5 33.229792
1 1118846980300 2 5 33.229792
2 1118846980400 2 5 33.228793
3 1118846980500 2 5 33.229751
4 1118846980600 2 5 33.229792
5 1118846980700 2 5 33.228793
6 1118846980800 2 5 33.229792
7 1118846980900 2 5 33.229792
8 1118846981000 2 5 33.228751
9 1118846981100 2 5 33.229792
10 1118846981200 2 5 33.208608
11 1118846981300 2 5 33.227132
12 1118846981400 2 5 33.336621
13 1118846981500 2 5 33.479722
14 1118846981600 2 5 33.588825
15 1118846981700 2 5 33.630050
16 1118846981800 2 5 33.590917
17 1118846981900 2 5 33.469737
18 1118846982000 2 5 33.347473
19 1118846982100 2 5 33.226709
20 1118846982200 2 5 33.208650
21 1118846982300 2 5 33.228793
22 1118846982400 2 5 33.229792
23 1118846982500 2 5 33.229792
24 1118846982600 2 5 33.229792
25 1118846982700 2 5 33.229792
26 1118846982800 2 5 33.229792
27 1118846982900 2 5 33.229792
28 1118846983000 2 5 33.246280
29 1118846983100 2 5 33.249679
... ... ... ... ...
110940 1118847026700 216 4 2075.236479
110941 1118847026700 216 6 2003.147881
110942 1118847026800 216 6 2005.195079
110943 1118847026900 216 6 2007.257628
110944 1118847027000 216 6 2009.326349
110945 1118847027100 216 6 2011.355955
110946 1118847027200 216 6 2013.408414
110947 1118847027300 216 6 2015.578361
110948 1118847027400 216 6 2017.705072
110949 1118847027500 216 6 2019.706931
110950 1118847027600 216 6 2021.704958
110951 1118847027700 216 6 2023.797811
110952 1118847027800 216 6 2026.042606
110953 1118847027900 216 6 2028.412340
110954 1118847028000 216 6 2030.716956
110955 1118847028100 216 6 2033.088126
110956 1118847028200 216 6 2035.603534
110957 1118847028300 216 6 2038.257239
110958 1118847028400 216 6 2041.054453
110959 1118847028500 216 6 2043.919571
110960 1118847028600 216 6 2046.753280
110961 1118847027800 220 6 2083.399848
110962 1118847027900 220 6 2085.766594
110963 1118847028000 220 6 2088.099204
110964 1118847028100 220 6 2090.431526
110965 1118847028200 220 6 2092.764843
110966 1118847028300 220 6 2095.097141
110967 1118847028400 220 6 2097.400835
110968 1118847028500 220 6 2099.776557
110969 1118847028600 220 6 2102.231965

110970 rows × 4 columns


In [122]:
dist_ts = distances.groupby(['timestamp','vID_o']).mean()

In [123]:
dist_ts


Out[123]:
vID_d dist
timestamp vID_o
1118846980200 2 5.0 33.229792
5 2.0 33.229792
1118846980300 2 5.0 33.229792
5 2.0 33.229792
1118846980400 2 5.0 33.228793
5 2.0 33.228793
1118846980500 2 5.0 33.229751
5 2.0 33.229751
1118846980600 2 5.0 33.229792
5 2.0 33.229792
1118846980700 2 5.0 33.228793
5 2.0 33.228793
1118846980800 2 5.0 33.229792
5 2.0 33.229792
1118846980900 2 5.0 33.229792
5 2.0 33.229792
1118846981000 2 5.0 33.228751
5 2.0 33.228751
1118846981100 2 5.0 33.229792
5 2.0 33.229792
13 3.5 49.643380
1118846981200 2 5.0 33.208608
5 2.0 33.208608
13 3.5 49.659262
1118846981300 2 5.0 33.227132
5 2.0 33.227132
13 3.5 49.638807
1118846981400 2 5.0 33.336621
5 2.0 33.336621
13 3.5 50.051401
... ... ... ...
1118847028600 163 6.0 1706.796479
164 6.0 1705.933245
167 6.0 1733.487544
169 6.0 1844.828471
170 6.0 1732.772386
174 6.0 1899.631218
175 6.0 1779.045284
176 6.0 1829.753790
179 6.0 1830.828562
180 6.0 1829.539209
181 6.0 1877.246257
182 6.0 1788.916197
183 6.0 1971.994403
184 6.0 1890.755858
190 6.0 2000.520373
191 6.0 1904.542866
192 6.0 1871.061029
194 6.0 1930.598507
196 6.0 1965.277446
198 6.0 2045.346492
199 6.0 1948.983203
200 6.0 2024.048435
203 6.0 2079.096924
204 6.0 2116.004679
205 6.0 2057.727665
206 6.0 2017.684943
213 6.0 2058.677899
215 6.0 2096.307909
216 6.0 2046.753280
220 6.0 2102.231965

32180 rows × 2 columns


In [125]:
with open('D:\\zzzLola\\PhD\\DataSet\\US101\\test\\dist.txt') as dist1, open('D:\\zzzLola\\PhD\\DataSet\\US101\\test\\dist_noRep.txt','w') as file_out:
    
    reader1 = csv.reader(dist1, delimiter='\t')
    reader2 = list(csv.reader(open("D:\\zzzLola\\PhD\\DataSet\\US101\\test\\dist.txt"), delimiter = "\t"))
    writer = csv.writer(file_out, delimiter='\t', lineterminator='\n')
    
    for row1 in reader1: 
        for row2 in reader2:
            #Calculate the distance between potins that are at the same time and are not the same point
            if row1[0] == row2[0] and row1[1] == row2[2] and row1[2]==row2[1]:
                file_out.write(row1[0] + '\t' + row1[1] + '\t' + row1[2] + '\t' + row1[3] + "\n")

In [ ]: