In [1]:
import csv
import pandas as pd
import numpy as np
import src.Paths as path

import sys
import os

print(os.getcwd())
print(sys.version_info)


C:\Users\Chrisi\Desktop\Studium\BigDataScience\sose17-small-data\python\traffic-prediction
sys.version_info(major=3, minor=5, micro=2, releaselevel='final', serial=0)

In [2]:
path.trajectories_training_file


Out[2]:
'../../../new_dataset/training/trajectories(table 5)_training.csv'

In [3]:
training_files = "../../dataset/training/"
links_file = "links (table 3).csv"
routes_file = "routes (table 4).csv"
trajectories_file = "trajectories(table 5)_training.csv"
volume_file = "volume(table 6)_training.csv"
weather_file = "weather (table 7)_training.csv"

routes_df = pd.read_csv(training_files+routes_file)
links_df = pd.read_csv(training_files+links_file)
weather_df = pd.read_csv(training_files+weather_file)
volume_df = pd.read_csv(training_files+volume_file)

training_files = "../../new_dataset/training/"
trajectories_df = pd.read_csv(training_files+trajectories_file)

In [3]:
routes_df


Out[3]:
intersection_id tollgate_id link_seq
0 A 2 110,123,107,108,120,117
1 A 3 110,123,107,108,119,114,118,122
2 B 1 105,100,111,103,116,101,121,106,113
3 B 3 105,100,111,103,122
4 C 1 115,102,109,104,112,111,103,116,101,121,106,113
5 C 3 115,102,109,104,112,111,103,122

In [4]:
df[df.index.duplicated()]


Out[4]:
A 3 1003251 110#2016-10-17 23:52:18#7.59;123#2016-10-17 23... 1017582 110#2016-10-17 23:54:35#9.10;123#2016-10-17 23... travel_time intersection_id tollgate_id vehicle_id C 1 1063652 187.30 A 2 1006723 73.25 1024591 39.99 1048805 81.56 1056928 41.82 C 1 1034865 165.19 A 2 1056431 54.49 1041225 89.59 1000688 84.44 1008967 60.29 1048805 91.48 1006723 80.88 3 1071183 166.99 2 1001434 60.92 1007748 49.66 3 1056578 155.45 2 1008227 144.21 C 1 1053059 340.41 A 2 1007808 103.29 3 1064766 89.75 B 3 1049332 51.95 1056935 171.71 1056848 100.55 1040366 131.78 A 2 1002179 57.64 B 3 1056788 145.83 A 2 1003848 73.69 1005189 18.68 1004088 33.89 B 3 1056935 125.75 ... ... 1008641 92.76 A 3 1009586 78.12 1022134 83.25 B 1 1001764 119.02 A 3 1008732 112.61 2 1006162 46.06 1003873 77.53 1002030 54.51 3 1032189 109.70 2 1032027 35.63 1008280 51.05 3 1013537 118.87 1046943 82.47 1000004 111.19 2 1003831 48.07 3 1002444 88.86 1001297 86.92 2 1005824 31.83 3 1020146 71.97 B 1 1014648 136.78 A 2 1000196 49.99 3 1011400 83.76 2 1002950 48.36 1000448 48.04 1004272 61.68 1012726 55.54 3 1004601 222.87 B 1 1003456 97.54 A 3 1003251 71.94 1017582 137.38 [25499 rows x 3 columns]
link_id length width lanes in_top out_top lane_width
0 100 58 3 1 105 111 3
1 101 84 3 1 116 121 3
2 102 131 9 3 115 109 3
3 103 23 12 4 111 122,116 3
4 104 293 9 3 109 112 3
5 105 78 6 2 NaN 100 3
6 106 15 3 1 121 113 3
7 107 34 9 3 123 108 3
8 108 40 9 3 107 119,120 3
9 109 135 9 3 102 104 3
10 110 109 9 3