In [7]:
from mylibrary.connections import cursor, conn, engine, Automapped_Base, session

In [8]:
def get_duration(station):
    tfl_response_json = json.loads(station.tfl_response)
    return_object = {}
    return_object["duration"] = tfl_response_json["journeys"][0]["duration"]
    return return_object

In [9]:
import json
Stations_orig = Automapped_Base.classes.stations_journeytime_stpauls
Stations_dest = Automapped_Base.classes.stations_journeytime_camden

for Stations in (Stations_orig, Stations_dest):
    nlcs = session.query(Stations.nlc).filter(Stations.tfl_message == "ok").all()
    for nlc in nlcs:
        station = session.query(Stations).filter(Stations.nlc == nlc).one()
        journey = get_duration(station)
        station.journey_time = journey["duration"]
        session.add(station)
        session.commit()

In [10]:
# Now we want to figure out which London mainline station this journey will go through

# We first want to scan through these ones to see whether the journey goes through.  
# Otherwise we want to return the last national rail station the journey went through

mainline_stations = {"Waterloo" : "WAT",
"Paddington" : "PAD",
"King's Cross" : "KGX",
"St Pancras" : "STP",
"Euston" : "EUS",
"Charing Cross" : "CHX",
"Victoria" : "VIC",
"London Bridge" : "LBG",
"Fenchurch Street" : "FST",
"Liverpool Street" : "LST"}

import pandas as pd
sql = """
select nlc as mainline_nlc, tlc as mainline_tlc, icscode as mainline_icscode from tt.all_stations
"""
all_stations = pd.read_sql(sql,conn)

mainline_df = pd.DataFrame(pd.Series(mainline_stations)).reset_index()
mainline_df.columns = ["station", "stn_code"]
mainline_df = mainline_df.merge(all_stations, left_on="stn_code", right_on="mainline_tlc", how="left")
mainline_df.drop("stn_code", axis=1)

mainline_icscodes = list(mainline_df["mainline_icscode"])
mainline_ics_lookup = mainline_df[["station", "mainline_icscode"]].to_dict(orient="records")
mainline_ics_lookup = {i['mainline_icscode'] : i["station"] for i in mainline_ics_lookup}

all_national_rail_ics = list(all_stations["mainline_icscode"])

In [11]:
# We will iterate through the legs of the journey finding the first match to a London mainline station

def get_legs(legs):
    all_legs = []
    for leg in legs:
        this_leg_data = {}
        try:
            this_leg_data["departed_from_name"] = leg["departurePoint"]["commonName"]
            this_leg_data["departed_from_icscode"] = leg["departurePoint"]["icsCode"]
        except:
            pass

        try:
            this_leg_data["arrived_at_name"] = leg["arrivalPoint"]["commonName"]
            this_leg_data["arrived_at_icscode"] = leg["arrivalPoint"]["icsCode"]
        except:
            pass
        all_legs.append(this_leg_data.copy())
    
    all_legs = pd.DataFrame(all_legs).reset_index()
    all_legs.rename(columns={"index":"tfl_order"}, inplace=True)
    return all_legs
    
    
def get_mainline_station(station):
    tfl_response_json = json.loads(station.tfl_response)
    first_journey = tfl_response_json["journeys"][0]
    legs = get_legs(first_journey["legs"])
    legs["mainline_station"] = False
    legs["national_rail_station"] = False
    legs["mainline_station"] = legs["arrived_at_icscode"].isin(mainline_icscodes)
    legs["national_rail_station"] = legs["arrived_at_icscode"].isin(all_national_rail_ics)
    
    legs = legs.sort(["mainline_station", "national_rail_station", "tfl_order"], ascending=[False,False,True])
    return legs.iloc[0]["arrived_at_icscode"]
    
#   order by whether mainline, and then original order.  Take first result
for Stations in (Stations_orig, Stations_dest):
    nlcs = session.query(Stations.nlc).filter(Stations.tfl_message == "ok").all() 
    for nlc in nlcs:
        station = session.query(Stations).filter(Stations.nlc == nlc).one()
        mainline_ics = get_mainline_station(station)
        station.london_mainline_station_ics = mainline_ics

        if mainline_ics in mainline_ics_lookup:
            station.london_mainline_station_text = mainline_ics_lookup[mainline_ics]
        else:
            station.london_mainline_station_text = "Other"

        session.add(station)
        session.commit()


/Users/robinlinacre/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:34: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [12]:
"""select a.nlc, v.geom, a.station_name, o.journey_time, o.london_mainline_station_text from tt.stations_voronoi_icscode_ok as v
left join tt.all_stations as a 
on cast(a.nlc as text) = v.nlc
left join tt.stations_journeytime_orig as o
on cast(o.nlc as text) = v.nlc
where tfl_message is not null
and a.london_or_gb = 'gb'
"""


Out[12]:
"select a.nlc, v.geom, a.station_name, o.journey_time, o.london_mainline_station_text from tt.stations_voronoi_icscode_ok as v\nleft join tt.all_stations as a \non cast(a.nlc as text) = v.nlc\nleft join tt.stations_journeytime_orig as o\non cast(o.nlc as text) = v.nlc\nwhere tfl_message is not null\nand a.london_or_gb = 'gb'\n"