In [ ]:
import pandas as pd

In [ ]:
from mylibrary.connections import cursor, conn, engine, Automapped_Base, session

In [ ]:
sql = """
select row_number() over (), s.geom, st_astext(s.geom) as wkt from tt_gh.staff_locations as  s, tt_gh.gb_and_london as g
where st_contains(g.geom,s.geom)  and g.name='london'
"""
london_staff = pd.read_sql(sql, conn)
len(london_staff)

In [ ]:
sql = """
select s.postcode, ST_AsEWKT(s.geom) as wkt from tt_gh.staff_locations as  s, tt_gh.gb_and_london as g
where st_contains(g.geom,s.geom)  and g.name='gb'
"""
non_london_staff = pd.read_sql(sql, conn)
len(non_london_staff)

In [ ]:
# For the London staff we will simply look up their change in journeytime on the TFL API.
# For the non London staff we: (1) look for stations within 10 miles
# And pick the one with the lowest journeytime

In [ ]:
# for each member of staff, find the list of stations within 10 miles, lookup journeytime and distance and pick best route
# sensible radis us 10000
def choose_station(df):
    """
    This function determines which station the member of staff uses to get to work
    """
    
    df = df.sort("st_distance")
    station = df.iloc[0]["nlc"]
    return station

out_table = []

for r in non_london_staff.iterrows():
    
    row = r[1]
    this_out_row = {}
    this_out_row["postcode"] = row["postcode"]
    
    wkt = row["wkt"]

    sql = """
    select nlc, station_name,  st_distance(ST_GeomFromEWKT('{wkt}'), a.geom) from 
    tt_gh.all_stations as a where st_contains(
    ST_Buffer(ST_GeomFromEWKT('{wkt}'),50000),a.geom)
    and nlc in (select nlc from tt_gh.stations_journeytime_stpauls where tfl_message = 'ok')
    """.format(wkt=wkt)
    df = pd.read_sql(sql,conn)
    
    if len(df) ==0:
        print "error"
        continue
    
    station_nlc = choose_station(df)

    sql = """
    select journey_time as journey_time_orig from tt_gh.stations_journeytime_stpauls where nlc = {}
    """.format(station_nlc)

    journey_time_orig = pd.read_sql(sql,conn).iloc[0,0]

    sql = """
    select nlc, station_name,  st_distance(ST_GeomFromEWKT('{wkt}'), a.geom) from tt_gh.all_stations as a where st_contains(
    ST_Buffer(ST_GeomFromEWKT('{wkt}'),50000),a.geom)
    and nlc in (select nlc from tt_gh.stations_journeytime_camden where tfl_message = 'ok')
    """.format(wkt=wkt)
    df = pd.read_sql(sql,conn)
    
    if len(df) ==0:
        print "error"
        continue
        
    station_nlc = choose_station(df)

    sql = """
    select journey_time as journey_time_orig from tt_gh.stations_journeytime_camden where nlc = {}
    """.format(station_nlc)

    journey_time_dest= pd.read_sql(sql,conn).iloc[0,0]

    this_out_row["original_journey_time"] = journey_time_orig
    this_out_row["new_journey_time"] = journey_time_dest
    

    out_table.append(this_out_row)

In [ ]:
out = pd.DataFrame(out_table)
print len(out)
print len(out[~out.isnull().any(axis=1)])
out.head(2)

In [ ]:
out["change_in_journey_time"] = out["new_journey_time"] - out["original_journey_time"]

In [ ]:
# Now write out to postgresgh
out.to_sql("change_in_journey_time_non_london", engine, schema="tt_gh", if_exists="replace")

In [ ]:
sql = """
drop view if exists tt_gh.vw_change_in_journey_time_non_london;
create view tt_gh.vw_change_in_journey_time_non_london as 
select p.postcode,original_journey_time,new_journey_time, change_in_journey_time, p.geom from tt_gh.change_in_journey_time_non_london as c
left join tt_gh.all_postcodes as p
on p.postcode = c.postcode 
"""
conn.rollback()
cur = conn.cursor()
cur.execute(sql)
conn.commit()

In [ ]:
pd.read_sql("select * from tt.vw_change_in_journey_time_non_london", conn)

In [ ]: