In [1]:
%matplotlib inline

import pandas as pd

In [47]:
with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/interchangeInfo.json') as fp:
    interchangeInfo = json.load(fp)
    buses = []
    for k,v in interchangeInfo.iteritems():
        buses.extend(v['buses'])
    buses = list(set(buses))

In [18]:
def nearly_equal(a,b,sig_fig=5):
    return ( a==b or 
             int(a*10**sig_fig) == int(b*10**sig_fig)
           )

In [2]:
df = pd.read_csv('/Users/ramz.sivagurunathan/hacks/hackathon/datasets/opal/OpalInsight-4May15.csv')

In [51]:
user_travel = df.groupby(['ORIGINLAT', 'ORIGINLONG'])

In [79]:
import json
for bus in buses:
    with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/buses/{0}.json'.format(bus)) as fp:
        bus_info = json.load(fp)
        lat_long_info = map(lambda x: {'lat': x['lat'], 'lon': x['lon']}, bus_info['trips'][0]['stops'])
        total_time = 0
        total_users = 0
        for geoinfo, data in user_travel:
            journey_list = data[['MASKEDCIN', 'JOURNEYSEGMENTTIME']].values
            for idx, elem in enumerate(lat_long_info):
                if nearly_equal(float(elem['lat']), geoinfo[0], 4) and nearly_equal(float(elem['lon']), geoinfo[1], 4):
                    for journey_val in journey_list:
                        total_time = total_time + journey_val[1]
                    total_users = total_users + (journey_list.size)/2
                    bus_info['trips'][0]['stops'][idx]['usage'] = (journey_list.size)/2
                    break
        if total_users > 0:
            bus_info['trips'][0]['avg_travel_time'] = total_time / total_users
        else:
            bus_info['trips'][0]['avg_travel_time'] = 0
            
    with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/buses/{0}.json'.format(bus),'w') as fp:
        json.dump(bus_info, fp, indent=4)

In [48]:
stops = {}
for bus in buses:
    with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/buses/{0}.json'.format(bus)) as fp:
        bus_info = json.load(fp)
        for stop in bus_info['trips'][0]['stops']:
            stops[stop['id']] = {'lat' : stop['lat'], 'long' : stop['lon'], 'name' : stop['name']}

In [50]:
import csv
with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/bus_stops.csv', 'w') as fp:
    writer = csv.writer(fp)
    for k,v in stops.iteritems():
        writer.writerow([v['name'], v['lat'], v['long']])

In [ ]:
import csv
with open('/Users/ramz.sivagurunathan/hacks/hackathon/repos/transport-analyser/data/bus_stops.csv', 'w') as fp:
    writer = csv.writer(fp)