In [ ]:
from mylibrary.connections import cursor, conn, engine, Automapped_Base, session
import pandas as pd
In [ ]:
# First get all the data from the two tables of stations
sql = """
select o.icscode,
o.nlc,
o.station_name,
o.journey_time as journey_time_orig,
d.journey_time as journey_time_dest,
o.london_mainline_station_text as ml_orig,
d.london_mainline_station_text as ml_dest,
o.london_mainline_station_ics as ml_ics_orig,
d.london_mainline_station_ics as ml_ics_dest
from tt_gh.stations_journeytime_stpauls as o
left join tt_gh.stations_journeytime_camden as d
on o.icscode = d.icscode
where o.tfl_message = 'ok' and d.tfl_message = 'ok'
"""
df1 = pd.read_sql(sql,conn)
print len(df1)
df1.head(5)
In [ ]:
# Next join on lat lng and other info
sql = """
select icscode, station_name from tt_gh.all_stations
"""
df2 = pd.read_sql(sql,conn)
df2.head(2)
# df2.groupby("icscode").count()
df2 = df2.drop_duplicates("icscode")
In [ ]:
merged = df1.merge(df2, left_on="ml_ics_orig", right_on="icscode", how="left")
print(len(merged))
merged = merged.drop(["icscode_y"], axis=1)
merged.rename(columns={"icscode_x": "icscode", "station_name_y": "ml_orig_station_name","station_name_x":"station_name"}, inplace=True)
merged = merged.merge(df2, left_on="ml_ics_dest", right_on="icscode", how="left")
merged = merged.drop(["icscode_y"], axis=1)
merged.rename(columns={"icscode_x": "icscode", "station_name_y": "ml_dest_station_name", "station_name_x":"station_name"}, inplace=True)
print(len(merged))
merged.head(5)
In [ ]:
# Finally get the lat lng
# Next join on lat lng and other info
sql = """
select nlc, icscode,
ST_X(ST_TRANSFORM(geom, 4326)) as lng,
ST_Y(ST_TRANSFORM(geom,4326)) as lat from tt_gh.all_stations;
"""
df3 = pd.read_sql(sql,conn)
df3.head(2)
df3 = df3.drop_duplicates("icscode")
df3 = df3.drop("icscode", axis=1)
In [ ]:
merged = merged.merge(df3, left_on="nlc", right_on="nlc", how="left")
merged["duration_change"] = merged["journey_time_dest"] - merged["journey_time_orig"]
In [ ]:
merged = merged[pd.notnull(merged["lat"])]
In [ ]:
merged.to_csv("website/data/stations_results.csv", index=False)
In [ ]: