In [ ]:
import pandas as pd
In [ ]:
from mylibrary.connections import cursor, conn, engine, Automapped_Base, session
In [ ]:
sql = """
select row_number() over (), s.geom, st_astext(s.geom) as wkt from tt_gh.staff_locations as s, tt_gh.gb_and_london as g
where st_contains(g.geom,s.geom) and g.name='london'
"""
london_staff = pd.read_sql(sql, conn)
len(london_staff)
In [ ]:
sql = """
select s.postcode, ST_AsEWKT(s.geom) as wkt from tt_gh.staff_locations as s, tt_gh.gb_and_london as g
where st_contains(g.geom,s.geom) and g.name='gb'
"""
non_london_staff = pd.read_sql(sql, conn)
len(non_london_staff)
In [ ]:
# For the London staff we will simply look up their change in journeytime on the TFL API.
# For the non London staff we: (1) look for stations within 10 miles
# And pick the one with the lowest journeytime
In [ ]:
# for each member of staff, find the list of stations within 10 miles, lookup journeytime and distance and pick best route
# sensible radis us 10000
def choose_station(df):
"""
This function determines which station the member of staff uses to get to work
"""
df = df.sort("st_distance")
station = df.iloc[0]["nlc"]
return station
out_table = []
for r in non_london_staff.iterrows():
row = r[1]
this_out_row = {}
this_out_row["postcode"] = row["postcode"]
wkt = row["wkt"]
sql = """
select nlc, station_name, st_distance(ST_GeomFromEWKT('{wkt}'), a.geom) from
tt_gh.all_stations as a where st_contains(
ST_Buffer(ST_GeomFromEWKT('{wkt}'),50000),a.geom)
and nlc in (select nlc from tt_gh.stations_journeytime_stpauls where tfl_message = 'ok')
""".format(wkt=wkt)
df = pd.read_sql(sql,conn)
if len(df) ==0:
print "error"
continue
station_nlc = choose_station(df)
sql = """
select journey_time as journey_time_orig from tt_gh.stations_journeytime_stpauls where nlc = {}
""".format(station_nlc)
journey_time_orig = pd.read_sql(sql,conn).iloc[0,0]
sql = """
select nlc, station_name, st_distance(ST_GeomFromEWKT('{wkt}'), a.geom) from tt_gh.all_stations as a where st_contains(
ST_Buffer(ST_GeomFromEWKT('{wkt}'),50000),a.geom)
and nlc in (select nlc from tt_gh.stations_journeytime_camden where tfl_message = 'ok')
""".format(wkt=wkt)
df = pd.read_sql(sql,conn)
if len(df) ==0:
print "error"
continue
station_nlc = choose_station(df)
sql = """
select journey_time as journey_time_orig from tt_gh.stations_journeytime_camden where nlc = {}
""".format(station_nlc)
journey_time_dest= pd.read_sql(sql,conn).iloc[0,0]
this_out_row["original_journey_time"] = journey_time_orig
this_out_row["new_journey_time"] = journey_time_dest
out_table.append(this_out_row)
In [ ]:
out = pd.DataFrame(out_table)
print len(out)
print len(out[~out.isnull().any(axis=1)])
out.head(2)
In [ ]:
out["change_in_journey_time"] = out["new_journey_time"] - out["original_journey_time"]
In [ ]:
# Now write out to postgresgh
out.to_sql("change_in_journey_time_non_london", engine, schema="tt_gh", if_exists="replace")
In [ ]:
sql = """
drop view if exists tt_gh.vw_change_in_journey_time_non_london;
create view tt_gh.vw_change_in_journey_time_non_london as
select p.postcode,original_journey_time,new_journey_time, change_in_journey_time, p.geom from tt_gh.change_in_journey_time_non_london as c
left join tt_gh.all_postcodes as p
on p.postcode = c.postcode
"""
conn.rollback()
cur = conn.cursor()
cur.execute(sql)
conn.commit()
In [ ]:
pd.read_sql("select * from tt.vw_change_in_journey_time_non_london", conn)
In [ ]: