In [1]:
# Merge Traveline bus stop data with Naptan information
import pandas as pd
from pandas import read_csv

In [6]:
# Import the stops/route data (generated by traveline.py script)
stops_df = read_csv('data/AnnotatedStopPointRef.csv')
routes_df = read_csv('data/RouteSection.csv')

In [3]:
# Import Naptan stops (this file is downloaded from http://data.gov.uk/dataset/naptan - it's the zip of CSV files)
naptan_df = read_csv('../data/naptan/NaPTANcsv/Stops.csv')


/home/normal/Projects/HackDays/BathHacked2014/env/local/lib/python2.7/site-packages/pandas/io/parsers.py:1150: DtypeWarning: Columns (1,2,5,6,7,9,11,12,13,15,20,21,22,23,24,34,35,36) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [4]:
# Merge traveline and naptan stop data
stops_df = pd.merge(stops_df, naptan_df, left_on='StopPointRef', right_on='AtcoCode')
len(stops_df)

# Write bus stops data
stops_df.to_csv('data/traveline_naptan_stops.csv')
#len(stops_df)

In [20]:
# Enrich traveline routes with Naptan info
stops_lat_long = naptan_df[["AtcoCode", "NaptanCode", "LocalityName", "Latitude", "Longitude"]]
#stops_lat_long
routes_info_df = pd.merge(routes_df, stops_lat_long, left_on='From', right_on='AtcoCode')
del routes_info_df['NaptanCode']
routes_info_df.rename(columns={'AtcoCode': 'From_AtcoCode', 
                               'Latitude': 'From_Latitude', 
                               'Longitude': 'From_Longitude',
                               'LocalityName': 'From_LocalityName'}, inplace=True)
routes_info_df = pd.merge(routes_info_df, stops_lat_long, left_on='To', right_on='AtcoCode')
routes_info_df.rename(columns={'AtcoCode': 'To_AtcoCode', 
                               'Latitude': 'To_Latitude', 
                               'Longitude': 'To_Longitude',
                               'LocalityName': 'To_LocalityName'}, inplace=True)

# Write bus route data
routes_info_df.to_csv('data/routes_with_latlong.csv')
routes_info_df_bath = routes_info_df[routes_info_df['To_LocalityName'] == 'Bath City Centre']
routes_info_df_bath.to_csv('data/routes_with_latlong_bath.csv')

In [ ]: