In [14]:
import os
import pandas as pd
import numpy as np
import sys
import shapefile
import operator

Read shapefile


In [15]:
sf_path = os.path.join('new_link', 'new_link_latlon')
link_sf = shapefile.Reader(sf_path)
shapeRecs = link_sf.shapeRecords()

link_map = {}
for data in shapeRecs:
    link_map[data.record[1]] = data.shape.points
#     print(data.shape.points)
#     break

Read veh loc file && process locs


In [20]:
vehloc_path = os.path.join('..', '..', '..', 'data', 'input_files_MckeesRocks_SPC', 'veh_loc', 'veh_loc_raw.txt')
final_str = 'time_interval,longitude,latitude\n'
# final_str = ''
_line_count = 0
max_time = 0
with open(vehloc_path, 'r') as infile:
    lines = infile.readlines()
    for line in lines:
        data = line.rstrip().split(' ')
        time = int(data[1]) // 60
        if time > max_time:
            max_time = time
        if time < 70:
            _line_count += 1
            if _line_count % 2 == 0:
                delta_h = time // 60
                delta_m = time % 60
                hour = str(5 + delta_h)
                if len(hour) == 1:
                    hour = '0' + hour
                if delta_m < 10:
                    minute = '0' + str(delta_m)
                else:
                    minute = str(delta_m)
                link_points = link_map[data[2]]
                porportion = float(data[3])
                loc = int(np.floor(porportion * len(link_points)))
                final_time = '2017-01-01T' + hour + ':' + minute + ':00Z'
                final_str += ','.join([str(e) for e in [time, round(link_points[loc][0],7), round(link_points[loc][1], 7)]]) + '\n'
        #             print(final_str)
        #             break

out_path = os.path.join('..', '..', '..', 'data', 'input_files_MckeesRocks_SPC', 'veh_loc', 'veh_loc_final.csv')
f = open(out_path, 'w')
f.write(final_str)
f.close()

print(max_time)
print(_line_count)


70
952848

In [ ]: