EDA



In [38]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))



In [39]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from utilities import GtfsStore, RawAVlDataStore, get_google_maps_plot
import configparser
from bokeh.io import output_notebook, show

In [40]:
gtfs_store = GtfsStore('data/sfmta-60-20160125')
avl_store = RawAVlDataStore('data/sfmtaAVLRawData-2016-01-25.csv')
routes_df, shapes_df, stop_times_df, stops_df, trips_df = gtfs_store.get_gtfs_data()
avl_df = avl_store.init()

In [41]:
#avl_df.info()

In [42]:
#trips_df.info()

In [43]:
_ = list(trips_df['block_id'].unique())

In [44]:
route_id_for_bus14 = gtfs_store.get_route_id_for(route_short_name='14')

In [45]:
#find train_assignment for bus 14
_ = avl_df['TRAIN_ASSIGNMENT'].unique()

In [46]:
df = trips_df[(trips_df['route_id']==route_id_for_bus14) & (trips_df['service_id']==1)]
_ = df['trip_id'].unique()

In [47]:
my_trips = df['trip_id'].unique()

mask = stop_times_df['trip_id'].isin(my_trips)

stop_times_for_bus14 = stop_times_df[mask]

In [48]:
_ = stop_times_for_bus14.head()

In [49]:
mygroup = stop_times_for_bus14.groupby(by='trip_id')

In [50]:
mygroup.min().sort_values('arrival_time').head()


Out[50]:
arrival_time departure_time stop_id stop_sequence
trip_id
7091187 04:10:00 04:10:00 5572 29
7091318 04:37:00 04:37:00 5334 1
7091188 04:45:00 04:45:00 5572 29
7091229 04:50:00 04:50:00 5572 29
7091376 04:56:00 04:56:00 5334 40

In [ ]:


In [51]:
#bus14_route_lat, bus14_route_lon = gtfs_store.get_route_coordinates_for_a_trip(trip_id=7091229)
bus14_route_lat, bus14_route_lon = gtfs_store.get_route_coordinates_for_a_trip(trip_id=7091318)

In [52]:
len(bus14_route_lat)


Out[52]:
147

In [53]:
df_1403 = avl_df[avl_df['TRAIN_ASSIGNMENT']=='1403']

In [54]:
bus14_gps_lat = df_1403.head(90)['LATITUDE'].tolist()
bus14_gps_lon = df_1403.head(90)['LONGITUDE'].tolist()

In [55]:
stops_for_bus14 = gtfs_store.get_stops_for_trip(7091318)
stops_for_bus14


Out[55]:
trip_id arrival_time departure_time stop_id stop_sequence stop_name stop_lat stop_lon
0 7091318 04:37:00 04:37:00 7099 1 Mission St & San Jose Ave 37.705979 -122.461315
1 7091318 04:38:09 04:38:09 5585 2 Mission St & Evergreen St 37.707410 -122.456530
2 7091318 04:39:15 04:39:15 5574 3 Mission St & Acton St 37.708890 -122.452200
3 7091318 04:39:47 04:39:47 5608 4 Mission St & Oliver St 37.709620 -122.450060
4 7091318 04:40:11 04:40:11 5628 5 Mission St & Whittier St 37.710220 -122.448450
5 7091318 04:41:00 04:41:00 5601 7 Mission St & Lowell St 37.711720 -122.445690
6 7091318 04:41:20 04:41:20 5595 8 Mission St & Guttenberg St 37.712510 -122.444750
7 7091318 04:42:09 04:42:09 5575 9 Mission St & Allison St 37.714480 -122.442640
8 7091318 04:43:00 04:43:00 5593 10 Mission St & Geneva Ave 37.716642 -122.440782
9 7091318 04:43:39 04:43:39 5599 11 Mission St & Italy Ave 37.718650 -122.439259
10 7091318 04:44:34 04:44:34 5615 12 Mission St & Russia Ave 37.721469 -122.437117
11 7091318 04:45:11 04:45:11 5610 14 Mission St & Persia Ave 37.723380 -122.435680
12 7091318 04:45:34 04:45:34 5582 15 Mission St & Brazil Ave 37.724540 -122.434810
13 7091318 04:46:09 04:46:09 5586 16 Mission St & Excelsior Ave 37.726355 -122.433510
14 7091318 04:47:00 04:47:00 5620 17 Mission St & Silver Ave 37.728620 -122.431300
15 7091318 04:47:48 04:47:48 5624 18 Mission St & Trumbull St 37.730660 -122.429270
16 7091318 04:49:08 04:49:08 5605 19 Mission St & Murray St 37.734040 -122.425920
17 7091318 04:49:49 04:49:49 5613 20 Mission St & Richland Ave 37.735910 -122.424420
18 7091318 04:50:18 04:50:18 5596 21 Mission St & Highland Ave 37.737448 -122.423963
19 7091318 04:50:44 04:50:44 5577 22 Mission St & Appleton Ave 37.738867 -122.423871
20 7091318 04:51:29 04:51:29 5583 23 Mission St & Cortland Ave 37.741052 -122.422759
21 7091318 04:52:00 04:52:00 5571 24 Mission St & 30th St 37.742435 -122.421819
22 7091318 04:52:56 04:52:56 5569 25 Mission St & 29th St 37.744290 -122.420640
23 7091318 04:53:46 04:53:46 5587 26 Mission St & Fair Ave 37.745920 -122.419590
24 7091318 04:54:22 04:54:22 5611 27 Mission St & Precita Ave 37.747110 -122.418820
25 7091318 04:55:30 04:55:30 5567 28 Mission St & 26th St 37.749510 -122.418100
26 7091318 04:57:00 04:57:00 5565 29 Mission St & 24th St 37.752730 -122.418410
27 7091318 04:57:36 04:57:36 5563 30 Mission St & 23rd St 37.754280 -122.418560
28 7091318 04:58:12 04:58:12 5561 31 Mission St & 22nd St 37.755820 -122.418710
29 7091318 04:58:51 04:58:51 5559 32 Mission St & 21st St 37.757510 -122.418870
30 7091318 04:59:28 04:59:28 5557 33 Mission St & 20th St 37.759110 -122.419020
31 7091318 05:00:04 05:00:04 5555 34 Mission St & 19th St 37.760660 -122.419170
32 7091318 05:00:50 05:00:50 5553 35 Mission St & 18th St 37.762635 -122.419348
33 7091318 05:02:00 05:02:00 5551 36 Mission St & 16th St 37.765540 -122.419640
34 7091318 05:03:01 05:03:01 5549 37 Mission St & 15th St 37.767140 -122.419790
35 7091318 05:03:58 05:03:58 5547 38 Mission St & 14th St 37.768620 -122.419930
36 7091318 05:05:09 05:05:09 5546 39 Mission St & 13th St 37.770470 -122.419780
37 7091318 05:08:00 05:08:00 5544 40 Mission St & 11th St 37.774216 -122.417120
38 7091318 05:09:20 05:09:20 5542 41 Mission St & 9th St 37.776450 -122.414310
39 7091318 05:10:04 05:10:04 5541 42 Mission St & 8th St 37.777700 -122.412730
40 7091318 05:11:05 05:11:05 7129 43 Mission St & 7th St 37.779354 -122.410505
41 7091318 05:12:08 05:12:08 5537 44 Mission St & 6th St 37.781190 -122.408310
42 7091318 05:13:00 05:13:00 5535 45 Mission St & 5th St 37.782590 -122.406540
43 7091318 05:14:22 05:14:22 5533 46 Mission St & 4th St 37.784640 -122.403930
44 7091318 05:15:36 05:15:36 5531 47 Mission St & 3rd St 37.786510 -122.401570
45 7091318 05:16:30 05:16:30 5530 48 Mission St & 2nd St 37.787870 -122.399850
46 7091318 05:18:01 05:18:01 5591 49 Mission St & Fremont St 37.790150 -122.396950
47 7091318 05:19:08 05:19:08 7759 50 Mission & Main St 37.791831 -122.394792
48 7091318 05:21:00 05:21:00 5334 51 Main St & Howard St 37.790844 -122.393577

In [56]:
gtfs_store.get_distances_between_stops_for_trip(7091318)


Out[56]:
[450.14356543620806,
 415.0584834099844,
 205.06612271105274,
 156.6004916282332,
 294.6440041496877,
 120.67323049201762,
 287.19330277391583,
 290.77766171831007,
 260.45660655049,
 365.82374834682906,
 247.3097973930512,
 150.01693766193674,
 232.02066549024417,
 318.22321912358336,
 288.7465092226585,
 477.67683872648695,
 246.31401931503768,
 175.7255618338082,
 158.0373740044524,
 261.97289056695155,
 174.63691530608764,
 230.91761678614898,
 203.463898859644,
 148.6779317785162,
 274.3506303910595,
 359.1846761502791,
 172.90461940286497,
 171.7955983002685,
 188.4982159538481,
 178.45016500054052,
 172.90453261997766,
 220.22875183750455,
 324.13076822877923,
 178.4500591644854,
 165.0744450552266,
 206.19077919760963,
 477.79980736409897,
 350.38809774390313,
 196.53028083800402,
 268.5235531805297,
 280.9561167108667,
 220.13160566827506,
 323.4655123249812,
 293.76388408800096,
 213.86997540090815,
 359.56720652310355,
 266.3403708123915,
 153.15571766793326]

In [19]:
config = configparser.ConfigParser()
config.read('config.ini')
google_api_key = config['DEFAULT']['GoogleMapsApiKey']

In [21]:
bus14_stops_lat = stops_for_bus14['stop_lat'].tolist()
bus14_stops_lon = stops_for_bus14['stop_lon'].tolist()

plot = get_google_maps_plot(bus14_stops_lat, bus14_stops_lon, bus14_gps_lat, bus14_gps_lon)
plot.plot_width = 800
plot.plot_height = 800
show(plot)



In [22]:
plot = get_google_maps_plot(bus14_route_lat, bus14_route_lon, bus14_gps_lat, bus14_gps_lon)
plot.plot_width = 800
plot.plot_height = 800
show(plot)

#Blue circles are static route and red circles are GPS points for a trip



In [ ]:
#Todo
        
    #use the information from the GTFS database to transform the geo coordinates onto a single new number 
    #that indicates how far along the route the bus has progressed along its route. In Cartesian coordinates, 
    #this will projecting a point onto a known path (polyline) and measuring its distance along the path from the origin. 
    
    # get a number of sets of trips for bus 14 from the data for year 2016 and transorom it as decribed above and apply statistical learning to predict arrival times