In [1]:
from pathlib import Path
import tempfile
import shutil

import pandas as pd
import numpy as np

DATA_DIR = Path('../data')

Problem 3


In [2]:
GTFS_TABLES = [
    'agency',
    'stops',
    'routes',
    'trips',
    'stop_times',
    'calendar',
    'calendar_dates',
    'fare_attributes',
    'fare_rules',
    'shapes',
    'frequencies',
    'transfers',
    'feed_info',
    ]

STR_FIELDS = [
  'agency_id'
  'trip_id',
  'service_id',
  'shape_id',
  'block_id',
  'route_id',
  'stop_id',
  'fare_id',
  'origin_id',
  'destination_id',
  'contains_id',
  'from_stop_id',
  'to_stop_id',
]

def read_gtfs(path):
    """
    Given a path (string or pathlib object) to a (zipped) GTFS feed,
    unzip the feed and save the files to a dictionary whose keys are
    named after GTFS tables ('stops', 'routes', etc.) and whose
    corresponding values are Pandas data frames representing the tables.
    Return the resulting dictionary.

    NOTES:
        - Ignore files that are not valid GTFS; see https://developers.google.com/transit/gtfs/reference/.
        - Ensure that all ID fields that could be string ('stop_id', 'route_id', etc.) are parsed as strings and not as numbers.    
    """
    path = Path(path)
    
    # Unzip feed into temporary directory
    tmp_dir = tempfile.TemporaryDirectory()
    shutil.unpack_archive(str(path), tmp_dir.name, 'zip')

    # Read valid GTFS files into Pandas data frames
    feed = {}
    dtype = {field: str for field in STR_FIELDS} # ensure some string types
    for p in Path(tmp_dir.name).iterdir():
        name = p.stem
        if name in GTFS_TABLES:
            feed[name] = pd.read_csv(p, dtype=dtype)
        
    # Delete temporary directory
    tmp_dir.cleanup()
    
    return feed

In [3]:
path = DATA_DIR/'auckland_gtfs_20161017.zip'
feed = read_gtfs(path)

for name, table in feed.items():
    print('-'*40)
    print(name)
    print(table.dtypes)
    print(table.head())


----------------------------------------
calendar
service_id    object
start_date     int64
end_date       int64
monday         int64
tuesday        int64
wednesday      int64
thursday       int64
friday         int64
saturday       int64
sunday         int64
dtype: object
                          service_id  start_date  end_date  monday  tuesday  \
0  14306060378-20161011155909_v46.26    20161030  20161231       0        0   
1   2366061839-20161011155909_v46.26    20161030  20161231       0        0   
2  14267044430-20161011151756_v46.25    20161017  20161029       0        0   
3   2365061662-20161011155909_v46.26    20161030  20161231       0        0   
4  12913041840-20161011155909_v46.26    20161030  20161231       0        0   

   wednesday  thursday  friday  saturday  sunday  
0          0         0       0         1       1  
1          0         0       0         1       0  
2          0         0       0         0       1  
3          0         0       0         0       1  
4          0         0       0         1       0  
----------------------------------------
feed_info
feed_publisher_name    object
feed_publisher_url     object
feed_lang              object
feed_start_date        object
feed_end_date          object
feed_version           object
dtype: object
Empty DataFrame
Columns: [feed_publisher_name, feed_publisher_url, feed_lang, feed_start_date, feed_end_date, feed_version]
Index: []
----------------------------------------
agency
agency_phone       object
agency_url         object
agency_id          object
agency_name        object
agency_timezone    object
agency_lang        object
dtype: object
   agency_phone                            agency_url agency_id  \
0  (09)355-3553  http://www.aucklandtransport.govt.nz     NZBGW   
1  (09)355-3553  http://www.aucklandtransport.govt.nz       PHH   
2  (09)355-3553  http://www.aucklandtransport.govt.nz       PBC   
3  (09)355-3553  http://www.aucklandtransport.govt.nz     ABEXP   
4  (09)355-3553  http://www.aucklandtransport.govt.nz        AM   

             agency_name   agency_timezone agency_lang  
0                Go West  Pacific/Auckland          en  
1          SeaLink Group  Pacific/Auckland          en  
2  The Party Bus Company  Pacific/Auckland          en  
3                 SkyBus  Pacific/Auckland          en  
4               AT Metro  Pacific/Auckland          en  
----------------------------------------
routes
route_short_name     object
route_long_name      object
route_type            int64
route_text_color    float64
agency_id            object
route_id             object
route_color         float64
dtype: object
  route_short_name                         route_long_name  route_type  \
0              005  Britomart To Pt Chevalier Via Westmere           3   
1              007              St Heliers To Pt Chevalier           3   
2              008                     New Lynn To Otahuhu           3   
3              009                    Onehunga To New Lynn           3   
4              010  Wynyard Quarter To Onehunga Via Unitec           3   

   route_text_color agency_id   route_id  route_color  
0               NaN     NZBML  route_001          NaN  
1               NaN     NZBML  route_002          NaN  
2               NaN        UE  route_003          NaN  
3               NaN        UE  route_004          NaN  
4               NaN     NZBML  route_005          NaN  
----------------------------------------
stops
stop_lat          float64
zone_id           float64
stop_lon          float64
stop_id            object
parent_station    float64
stop_desc         float64
stop_name          object
location_type       int64
stop_code           int64
dtype: object
   stop_lat  zone_id   stop_lon stop_id  parent_station  stop_desc  \
0 -36.77869      NaN  174.75143    3382         41344.0        NaN   
1 -36.82308      NaN  174.80429    3480         41023.0        NaN   
2 -36.87175      NaN  174.60227   21094             NaN        NaN   
3 -36.89001      NaN  174.77511   11542             NaN        NaN   
4 -36.89261      NaN  174.77465   11543             NaN        NaN   

                    stop_name  location_type  stop_code  
0                   9 Nile Rd              0       3382  
1              40 Vauxhall Rd              0       3480  
2       Metcalfe Rd and Ranui              1      21094  
3  Manukau Rd and King George              1      11542  
4   Manukau Rd and Queen Mary              1      11543  
----------------------------------------
stop_times
trip_id                 object
arrival_time            object
departure_time          object
stop_id                 object
stop_sequence            int64
stop_headsign          float64
pickup_type            float64
drop_off_type          float64
shape_dist_traveled    float64
dtype: object
                            trip_id arrival_time departure_time stop_id  \
0  1000046829-20161011151756_v46.25     07:00:00       07:00:00    6920   
1  1000046829-20161011151756_v46.25     07:02:10       07:02:10    6800   
2  1000046829-20161011151756_v46.25     07:02:20       07:02:20    6364   
3  1000046829-20161011151756_v46.25     07:02:30       07:02:30    6366   
4  1000046829-20161011151756_v46.25     07:02:40       07:02:40    6182   

   stop_sequence  stop_headsign  pickup_type  drop_off_type  \
0              1            NaN          NaN            NaN   
1              2            NaN          NaN            NaN   
2              3            NaN          NaN            NaN   
3              4            NaN          NaN            NaN   
4              5            NaN          NaN            NaN   

   shape_dist_traveled  
0             0.000000  
1             1.179775  
2             1.467670  
3             1.734413  
4             2.006302  
----------------------------------------
trips
block_id         object
route_id         object
direction_id      int64
trip_headsign    object
shape_id         object
service_id       object
trip_id          object
dtype: object
  block_id   route_id  direction_id trip_headsign                    shape_id  \
0      NaN  route_091             1       Mangere  1209-20161011155909_v46.26   
1      NaN  route_128             0       Manukau  1232-20161011155909_v46.26   
2      NaN  route_079             0  Civic Centre   267-20161011151756_v46.25   
3      NaN  route_127             0       Manukau  1230-20161011155909_v46.26   
4      NaN  route_270             1     Glenfield    55-20161011155909_v46.26   

                          service_id                            trip_id  
0  14306060378-20161011155909_v46.26  14306060378-20161011155909_v46.26  
1   2366061839-20161011155909_v46.26   2366061839-20161011155909_v46.26  
2  14267044430-20161011151756_v46.25  14267044430-20161011151756_v46.25  
3   2365061662-20161011155909_v46.26   2365061662-20161011155909_v46.26  
4  12913041840-20161011155909_v46.26  12913041840-20161011155909_v46.26  
----------------------------------------
calendar_dates
service_id        object
date               int64
exception_type     int64
dtype: object
                          service_id      date  exception_type
0  14267044430-20161011151756_v46.25  20161024               1
1  15428038620-20161011151756_v46.25  20161024               2
2   3090042187-20161011151756_v46.25  20161024               2
3   3922046164-20161011151756_v46.25  20161024               2
4  14625040096-20161011151756_v46.25  20161024               1
----------------------------------------
shapes
shape_id              object
shape_pt_sequence      int64
shape_pt_lon         float64
shape_pt_lat         float64
dtype: object
                    shape_id  shape_pt_sequence  shape_pt_lon  shape_pt_lat
0  827-20161011151756_v46.25                  0     174.77834     -36.61193
1  827-20161011151756_v46.25                  1     174.77836     -36.61198
2  827-20161011151756_v46.25                  2     174.77844     -36.61196
3  827-20161011151756_v46.25                  3     174.77892     -36.61186
4  827-20161011151756_v46.25                  4     174.77942     -36.61175

Problem 4


In [4]:
def compute_trip_stats(feed):
    """
    Return a data frame of trip stats.
    """
    st = feed['stop_times']
    
    # Ensure stop times are properly sorted
    st = st.sort_values(['trip_id', 'stop_sequence'])
    
    # Aggregate stop times into trip stats
    def my_agg(group):
        d = {}
        d['start_time'] = group['departure_time'].iat[0]
        d['end_time'] = group['departure_time'].iat[-1]
        d['distance'] = group['shape_dist_traveled'].iat[-1]
        return pd.Series(d)
        
    f = st.groupby('trip_id').apply(my_agg).reset_index()
    
    # Append some extra route information
    f = f.merge(
      feed['trips'][['trip_id', 'route_id']]).merge(
      feed['routes'])
    
    return f

In [5]:
ts = compute_trip_stats(feed)
print(ts.head())

# Restrict to buses
f = ts[ts['route_type'] == 3].copy()

# Find shortest and longest bus trips
i, j = f['distance'].argmin(), f['distance'].argmax()

print('-'*40)
print(f.ix[i])

print('-'*40)
print(f.ix[j])


                             trip_id   distance  end_time start_time  \
0   1000046829-20161011151756_v46.25  28.398145  08:12:00   07:00:00   
1   1000046830-20161011151756_v46.25  28.398145  08:45:00   07:25:00   
2   1000046831-20161011151756_v46.25  28.398145  08:20:00   07:10:00   
3  18220046548-20161011151756_v46.25  29.063917  07:39:00   06:30:00   
4  18220046549-20161011151756_v46.25  29.063917  09:05:00   07:45:00   

    route_id route_short_name  \
0  route_157             457X   
1  route_157             457X   
2  route_157             457X   
3  route_157             457X   
4  route_157             457X   

                                     route_long_name  route_type  \
0  Manukau City Centre To Britomart Express Via O...           3   
1  Manukau City Centre To Britomart Express Via O...           3   
2  Manukau City Centre To Britomart Express Via O...           3   
3  Manukau City Centre To Britomart Express Via O...           3   
4  Manukau City Centre To Britomart Express Via O...           3   

   route_text_color agency_id  route_color  
0               NaN     NZBWP          NaN  
1               NaN     NZBWP          NaN  
2               NaN     NZBWP          NaN  
3               NaN     NZBWP          NaN  
4               NaN     NZBWP          NaN  
----------------------------------------
trip_id                              4957006840-20161011151756_v46.25
distance                                                      2.01022
end_time                                                     07:25:00
start_time                                                   07:20:00
route_id                                                    route_283
route_short_name                                                  957
route_long_name     Beach Haven To Albany Station Via Massey Unive...
route_type                                                          3
route_text_color                                                  NaN
agency_id                                                         BTL
route_color                                                       NaN
Name: 31416, dtype: object
----------------------------------------
trip_id             15476039347-20161011151756_v46.25
distance                                      65.4041
end_time                                     18:45:00
start_time                                   17:10:00
route_id                                    route_167
route_short_name                                  476
route_long_name        Pukekohe To Tuakau To Pukekohe
route_type                                          3
route_text_color                                  NaN
agency_id                                       NZBWP
route_color                                       NaN
Name: 19268, dtype: object

In [ ]: