In [38]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [39]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from utilities import GtfsStore, RawAVlDataStore, get_google_maps_plot
import configparser
from bokeh.io import output_notebook, show
In [40]:
gtfs_store = GtfsStore('data/sfmta-60-20160125')
avl_store = RawAVlDataStore('data/sfmtaAVLRawData-2016-01-25.csv')
routes_df, shapes_df, stop_times_df, stops_df, trips_df = gtfs_store.get_gtfs_data()
avl_df = avl_store.init()
In [41]:
#avl_df.info()
In [42]:
#trips_df.info()
In [43]:
_ = list(trips_df['block_id'].unique())
In [44]:
route_id_for_bus14 = gtfs_store.get_route_id_for(route_short_name='14')
In [45]:
#find train_assignment for bus 14
_ = avl_df['TRAIN_ASSIGNMENT'].unique()
In [46]:
df = trips_df[(trips_df['route_id']==route_id_for_bus14) & (trips_df['service_id']==1)]
_ = df['trip_id'].unique()
In [47]:
my_trips = df['trip_id'].unique()
mask = stop_times_df['trip_id'].isin(my_trips)
stop_times_for_bus14 = stop_times_df[mask]
In [48]:
_ = stop_times_for_bus14.head()
In [49]:
mygroup = stop_times_for_bus14.groupby(by='trip_id')
In [50]:
mygroup.min().sort_values('arrival_time').head()
Out[50]:
In [ ]:
In [51]:
#bus14_route_lat, bus14_route_lon = gtfs_store.get_route_coordinates_for_a_trip(trip_id=7091229)
bus14_route_lat, bus14_route_lon = gtfs_store.get_route_coordinates_for_a_trip(trip_id=7091318)
In [52]:
len(bus14_route_lat)
Out[52]:
In [53]:
df_1403 = avl_df[avl_df['TRAIN_ASSIGNMENT']=='1403']
In [54]:
bus14_gps_lat = df_1403.head(90)['LATITUDE'].tolist()
bus14_gps_lon = df_1403.head(90)['LONGITUDE'].tolist()
In [55]:
stops_for_bus14 = gtfs_store.get_stops_for_trip(7091318)
stops_for_bus14
Out[55]:
In [56]:
gtfs_store.get_distances_between_stops_for_trip(7091318)
Out[56]:
In [19]:
config = configparser.ConfigParser()
config.read('config.ini')
google_api_key = config['DEFAULT']['GoogleMapsApiKey']
In [21]:
bus14_stops_lat = stops_for_bus14['stop_lat'].tolist()
bus14_stops_lon = stops_for_bus14['stop_lon'].tolist()
plot = get_google_maps_plot(bus14_stops_lat, bus14_stops_lon, bus14_gps_lat, bus14_gps_lon)
plot.plot_width = 800
plot.plot_height = 800
show(plot)
In [22]:
plot = get_google_maps_plot(bus14_route_lat, bus14_route_lon, bus14_gps_lat, bus14_gps_lon)
plot.plot_width = 800
plot.plot_height = 800
show(plot)
#Blue circles are static route and red circles are GPS points for a trip
In [ ]:
#Todo
#use the information from the GTFS database to transform the geo coordinates onto a single new number
#that indicates how far along the route the bus has progressed along its route. In Cartesian coordinates,
#this will projecting a point onto a known path (polyline) and measuring its distance along the path from the origin.
# get a number of sets of trips for bus 14 from the data for year 2016 and transorom it as decribed above and apply statistical learning to predict arrival times