In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_columns = 150
pd.options.display.float_format='{:,.2f}'.format

import sys
sys.path.append('C:/CASA/Workspace/sfdata_wrangler/sfdata_wrangler')

from SFMuniDataHelper import SFMuniDataHelper
from DataFrameViewer import DataFrameViewer

sfmuniHelper = SFMuniDataHelper()
viewer = DataFrameViewer()

In [3]:
# eventually convert filenames to arguments
hdffile = "D:/sfbusdatastore/sfmuni_aggregate.h5"
store = pd.HDFStore(hdffile)
store


Out[3]:
<class 'pandas.io.pytables.HDFStore'>
File path: D:/sfbusdatastore/sfmuni_aggregate.h5
/average                      frame_table  (typ->appendable,nrows->28692042,ncols->85,indexers->[index],dc->[MONTH,DOW,ROUTE,PATTCODE,DIR,TRIP,SEQ,NUMDAYS,OBSTRIPS,ROUTEA,VEHNO,SCHOOL,LASTTRIP,NEXTTRIP,HEADWAY,HEADWAY_STD,TOD,QSTOP,STOPNAME,TIMEPOINT,EOL,LAT,LAT_STD,LON,LON_STD,NS,EW,MAXVEL,MAXVEL_STD,MILES,MILES_STD,GODOM,GODOM_STD,VEHMILES,VEHMILES_STD,ON,ON_STD,OFF,OFF_STD,LOAD_ARR,LOAD_ARR_STD,LOAD_DEP,LOAD_DEP_STD,PASSMILES,PASSMILES_STD,PASSHOURS,PASSHOURS_STD,RDBRDNGS,RDBRDNGS_STD,LOADCODE,LOADCODE_STD,CAPACITY,CAPACITY_STD,DOORCYCLES,DOORCYCLES_STD,WHEELCHAIR,WHEELCHAIR_STD,BIKERACK,BIKERACK_STD,TIMESTOP,TIMESTOP_S,TIMESTOP_DEV,TIMESTOP_DEV_STD,DOORCLOSE,DOORCLOSE_S,DOORCLOSE_DEV,DOORCLOSE_DEV_STD,DWELL,DWELL_STD,DWELL_S,PULLOUT,PULLDWELL,PULLDWELL_STD,RUNTIME,RUNTIME_STD,RUNTIME_S,RECOVERY,RECOVERY_STD,RECOVERY_S,DLPMIN,DLPMIN_STD,ONTIME2,ONTIME2_STD,ONTIME10,ONTIME10_STD])
/daily_route_stops            frame_table  (typ->appendable,nrows->987424,ncols->47,indexers->[index],dc->[MONTH,DOW,ROUTE,PATTCODE,DIR,SEQ,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,ROUTEA,HEADWAY,QSTOP,STOPNAME,TIMEPOINT,EOL,LAT,LON,NS,EW,MAXVEL,MILES,GODOM,VEHMILES,ON,OFF,LOAD_ARR,LOAD_DEP,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                        
/daily_routes                 frame_table  (typ->appendable,nrows->12784,ncols->32,indexers->[index],dc->[MONTH,DOW,ROUTE,DIR,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,ROUTEA,HEADWAY,VEHMILES,ON,OFF,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
/daily_stops                  frame_table  (typ->appendable,nrows->381941,ncols->25,indexers->[index],dc->[MONTH,DOW,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,QSTOP,STOPNAME,TIMEPOINT,EOL,LAT,LON,ON,OFF,RDBRDNGS,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
/daily_system                 frame_table  (typ->appendable,nrows->121,ncols->28,indexers->[index],dc->[MONTH,DOW,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,VEHMILES,ON,OFF,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
/tod_route_stops              frame_table  (typ->appendable,nrows->3810220,ncols->48,indexers->[index],dc->[TOD,MONTH,DOW,ROUTE,PATTCODE,DIR,SEQ,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,ROUTEA,HEADWAY,QSTOP,STOPNAME,TIMEPOINT,EOL,LAT,LON,NS,EW,MAXVEL,MILES,GODOM,VEHMILES,ON,OFF,LOAD_ARR,LOAD_DEP,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                   
/tod_routes                   frame_table  (typ->appendable,nrows->67767,ncols->33,indexers->[index],dc->[TOD,MONTH,DOW,ROUTE,DIR,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,ROUTEA,HEADWAY,VEHMILES,ON,OFF,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
/tod_stops                    frame_table  (typ->appendable,nrows->2367349,ncols->26,indexers->[index],dc->[TOD,MONTH,DOW,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,QSTOP,STOPNAME,TIMEPOINT,EOL,LAT,LON,ON,OFF,RDBRDNGS,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
/tod_system                   frame_table  (typ->appendable,nrows->926,ncols->29,indexers->[index],dc->[TOD,MONTH,DOW,NUMDAYS,DAILYTRIPS,TOTTRIPS,OBSTRIPS,VEHMILES,ON,OFF,PASSMILES,PASSHOURS,RDBRDNGS,CAPACITY,DOORCYCLES,WHEELCHAIR,BIKERACK,TIMESTOP_DEV,DOORCLOSE_DEV,DWELL,DWELL_S,PULLDWELL,RUNTIME,RUNTIME_S,RECOVERY,RECOVERY_S,DLPMIN,ONTIME2,ONTIME10])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             

In [4]:
# shows available months of data
months = store.select_column('daily_route_stops', 'MONTH').unique()
months.sort()
months


Out[4]:
array(['2008-03-01T00:00:00.000000000+0000',
       '2008-04-01T01:00:00.000000000+0100',
       '2008-05-01T01:00:00.000000000+0100',
       '2008-06-01T01:00:00.000000000+0100',
       '2009-06-01T01:00:00.000000000+0100',
       '2009-07-01T01:00:00.000000000+0100',
       '2009-08-01T01:00:00.000000000+0100',
       '2009-09-01T01:00:00.000000000+0100',
       '2009-10-01T01:00:00.000000000+0100',
       '2009-11-01T00:00:00.000000000+0000',
       '2009-12-01T00:00:00.000000000+0000',
       '2010-01-01T00:00:00.000000000+0000',
       '2010-02-01T00:00:00.000000000+0000',
       '2010-03-01T00:00:00.000000000+0000',
       '2010-05-01T01:00:00.000000000+0100',
       '2010-06-01T01:00:00.000000000+0100',
       '2010-07-01T01:00:00.000000000+0100',
       '2010-08-01T01:00:00.000000000+0100',
       '2010-09-01T01:00:00.000000000+0100',
       '2010-10-01T01:00:00.000000000+0100',
       '2010-11-01T00:00:00.000000000+0000',
       '2010-12-01T00:00:00.000000000+0000',
       '2011-01-01T00:00:00.000000000+0000',
       '2011-02-01T00:00:00.000000000+0000',
       '2011-03-01T00:00:00.000000000+0000',
       '2011-04-01T01:00:00.000000000+0100',
       '2011-05-01T01:00:00.000000000+0100',
       '2011-06-01T01:00:00.000000000+0100',
       '2011-07-01T01:00:00.000000000+0100',
       '2011-08-01T01:00:00.000000000+0100',
       '2011-09-01T01:00:00.000000000+0100',
       '2011-10-01T01:00:00.000000000+0100',
       '2011-11-01T00:00:00.000000000+0000',
       '2011-12-01T00:00:00.000000000+0000',
       '2012-01-01T00:00:00.000000000+0000',
       '2012-02-01T00:00:00.000000000+0000',
       '2012-03-01T00:00:00.000000000+0000',
       '2012-04-01T01:00:00.000000000+0100',
       '2012-05-01T01:00:00.000000000+0100',
       '2012-06-01T01:00:00.000000000+0100',
       '2012-08-01T01:00:00.000000000+0100'], dtype='datetime64[ns]')

In [6]:
# daily system (121 rows, 28 columns) stats - choose time frame here
daily_system = store.select('daily_system',where=['DOW==1','MONTH>2008-03-01','MONTH<2012-03-01'])

In [54]:
#plot line graphs , care in interpreting links where months missing
"""
fig, axes = plt.subplots(nrows=7)
daily_system.plot(ax=axes[0], x='MONTH', y=['ON'],figsize=(20,40)); 
axes[0].set_title('ON')

daily_system.plot(ax=axes[1], x='MONTH', y=['CAPACITY']); 
axes[1].set_title('CAPACITY')

daily_system.plot(ax=axes[2],x='MONTH', y=['PASSMILES']); 
axes[2].set_title('PASSMILES')

daily_system.plot(ax=axes[3],x='MONTH', y=['VEHMILES']); 
axes[3].set_title('VEHMILES')

daily_system.plot(ax=axes[4],x='MONTH', y=['TIMESTOP_DEV']); 
axes[4].set_title('TIMESTOP_DEV')

daily_system.plot(ax=axes[5],x='MONTH', y=['ONTIME2']); 
axes[5].set_title('ONTIME2')

daily_system.plot(ax=axes[6],x='MONTH', y=['ONTIME10']); 
axes[6].set_title('ONTIME10')
"""


Out[54]:
"\nfig, axes = plt.subplots(nrows=7)\ndaily_system.plot(ax=axes[0], x='MONTH', y=['ON'],figsize=(20,40)); \naxes[0].set_title('ON')\n\ndaily_system.plot(ax=axes[1], x='MONTH', y=['CAPACITY']); \naxes[1].set_title('CAPACITY')\n\ndaily_system.plot(ax=axes[2],x='MONTH', y=['PASSMILES']); \naxes[2].set_title('PASSMILES')\n\ndaily_system.plot(ax=axes[3],x='MONTH', y=['VEHMILES']); \naxes[3].set_title('VEHMILES')\n\ndaily_system.plot(ax=axes[4],x='MONTH', y=['TIMESTOP_DEV']); \naxes[4].set_title('TIMESTOP_DEV')\n\ndaily_system.plot(ax=axes[5],x='MONTH', y=['ONTIME2']); \naxes[5].set_title('ONTIME2')\n\ndaily_system.plot(ax=axes[6],x='MONTH', y=['ONTIME10']); \naxes[6].set_title('ONTIME10')\n"

In [90]:
plt.scatter(daily_system.MONTH, daily_system.ON)
daily_system.plot( x='MONTH', y=['ON'],figsize=(20,5)); 
daily_system.plot( x='MONTH', y=['OFF'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[90]:
<matplotlib.legend.Legend at 0x2eaab518>

In [92]:
plt.scatter(daily_system.MONTH, daily_system.CAPACITY)
daily_system.plot( x='MONTH', y=['CAPACITY'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[92]:
<matplotlib.legend.Legend at 0x2f91b0b8>

In [87]:
plt.scatter(daily_system.MONTH, daily_system.PASSMILES)
daily_system.plot( x='MONTH', y=['PASSMILES'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[87]:
<matplotlib.legend.Legend at 0x2e429828>

In [94]:
plt.scatter(daily_system.MONTH, daily_system.VEHMILES)
daily_system.plot( x='MONTH', y=['VEHMILES'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[94]:
<matplotlib.legend.Legend at 0x2fd089b0>

In [85]:
plt.scatter(daily_system.MONTH, daily_system.TIMESTOP_DEV)
daily_system.plot( x='MONTH', y=['TIMESTOP_DEV'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[85]:
<matplotlib.legend.Legend at 0x2dfb4668>

In [83]:
plt.scatter(daily_system.MONTH, daily_system.ONTIME10)
daily_system.plot( x='MONTH', y=['ONTIME10'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[83]:
<matplotlib.legend.Legend at 0x2dd626d8>

In [96]:
plt.scatter(daily_system.MONTH, daily_system.ONTIME2)
daily_system.plot( x='MONTH', y=['ONTIME2'],figsize=(20,5));
plt.scatter(daily_system.MONTH, daily_system.ONTIME10)
daily_system.plot( x='MONTH', y=['ONTIME10'],figsize=(20,5)); 
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[96]:
<matplotlib.legend.Legend at 0x2ff6ed68>

In [109]:
# Daily
# Sample: Most affected routes overall
# {3,17,19,27,35,37,44,52}
daily_routes_query3 = store.select('daily_routes',where=['DOW==1','ROUTE==3','DIR==1'])
daily_routes_query17 = store.select('daily_routes',where=['DOW==1','ROUTE==17','DIR==1'])
daily_routes_query19 = store.select('daily_routes',where=['DOW==1','ROUTE==19','DIR==1'])
daily_routes_query27 = store.select('daily_routes',where=['DOW==1','ROUTE==27','DIR==1'])
daily_routes_query35 = store.select('daily_routes',where=['DOW==1','ROUTE==35','DIR==1'])
daily_routes_query37 = store.select('daily_routes',where=['DOW==1','ROUTE==37','DIR==1'])
daily_routes_query44 = store.select('daily_routes',where=['DOW==1','ROUTE==44','DIR==1'])
daily_routes_query52 = store.select('daily_routes',where=['DOW==1','ROUTE==52','DIR==1'])

daily_routes_query3.plot( x='MONTH', y='ON',figsize=(20,10), label='#3')
plt.scatter(daily_routes_query3.MONTH,daily_routes_query3.ON)

daily_routes_query17.plot( x='MONTH', y='ON',figsize=(20,10), label = '#17')
plt.scatter(daily_routes_query17.MONTH,daily_routes_query17.ON)

daily_routes_query19.plot( x='MONTH', y='ON',figsize=(20,10), label = '#19')
plt.scatter(daily_routes_query19.MONTH,daily_routes_query19.ON)

daily_routes_query27.plot( x='MONTH', y='ON',figsize=(20,10),label = '#27')
plt.scatter(daily_routes_query27.MONTH,daily_routes_query27.ON)

daily_routes_query35.plot( x='MONTH', y='ON',figsize=(20,10),label='#35')
plt.scatter(daily_routes_query35.MONTH,daily_routes_query35.ON)

daily_routes_query37.plot( x='MONTH', y='ON',figsize=(20,10),label='#37')
plt.scatter(daily_routes_query37.MONTH,daily_routes_query37.ON)

daily_routes_query44.plot( x='MONTH', y='ON',figsize=(20,10),label='#44')
plt.scatter(daily_routes_query44.MONTH,daily_routes_query44.ON)

daily_routes_query52.plot( x='MONTH', y='ON',figsize=(20,10),label='#52')
plt.scatter(daily_routes_query52.MONTH,daily_routes_query52.ON)

legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

axvline(x='May 2010',color='k',ls='dashed')


Out[109]:
<matplotlib.lines.Line2D at 0x38442c18>

In [129]:
# Time of day routes, i.e. tod_routes
# Sample: Most affected routes overall
# {3,17,19,27,35,37,44,52}
# Select TODs {600,900,1400,1600,1900,2200}
todv = 'TOD==2200'

daily_routes_query3 = store.select('tod_routes',where=['DOW==1','ROUTE==3','DIR==1',todv])
daily_routes_query17 = store.select('tod_routes',where=['DOW==1','ROUTE==17','DIR==1',todv])
daily_routes_query19 = store.select('tod_routes',where=['DOW==1','ROUTE==19','DIR==1',todv])
daily_routes_query27 = store.select('tod_routes',where=['DOW==1','ROUTE==27','DIR==1',todv])
daily_routes_query35 = store.select('tod_routes',where=['DOW==1','ROUTE==35','DIR==1',todv])
daily_routes_query37 = store.select('tod_routes',where=['DOW==1','ROUTE==37','DIR==1',todv])
daily_routes_query44 = store.select('tod_routes',where=['DOW==1','ROUTE==44','DIR==1',todv])
daily_routes_query52 = store.select('tod_routes',where=['DOW==1','ROUTE==52','DIR==1',todv])

daily_routes_query3.plot( x='MONTH', y='ON',figsize=(20,10), label='#3')
plt.scatter(daily_routes_query3.MONTH,daily_routes_query3.ON)

daily_routes_query17.plot( x='MONTH', y='ON',figsize=(20,10), label = '#17')
plt.scatter(daily_routes_query17.MONTH,daily_routes_query17.ON)

daily_routes_query19.plot( x='MONTH', y='ON',figsize=(20,10), label = '#19')
plt.scatter(daily_routes_query19.MONTH,daily_routes_query19.ON)

daily_routes_query27.plot( x='MONTH', y='ON',figsize=(20,10),label = '#27')
plt.scatter(daily_routes_query27.MONTH,daily_routes_query27.ON)

daily_routes_query35.plot( x='MONTH', y='ON',figsize=(20,10),label='#35')
plt.scatter(daily_routes_query35.MONTH,daily_routes_query35.ON)

daily_routes_query37.plot( x='MONTH', y='ON',figsize=(20,10),label='#37')
plt.scatter(daily_routes_query37.MONTH,daily_routes_query37.ON)

daily_routes_query44.plot( x='MONTH', y='ON',figsize=(20,10),label='#44')
plt.scatter(daily_routes_query44.MONTH,daily_routes_query44.ON)

daily_routes_query52.plot( x='MONTH', y='ON',figsize=(20,10),label='#52')
plt.scatter(daily_routes_query52.MONTH,daily_routes_query52.ON)

legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

axvline(x='May 2010',color='k',ls='dashed')


Out[129]:
<matplotlib.lines.Line2D at 0x3adbbf98>

In [21]:
#Prepares data for heatmap

# Selects a specific route, in a specific month, in a specific direction, on a specific day of the week.. 
tod_route_stops_query = store.select('tod_route_stops',where=['DOW==1','MONTH>=2010-05-01','MONTH<=2010-05-01','ROUTE==17','DIR==1'])

# Crops to relevant data
tod_route_stops_subset = tod_route_stops_query[['TOD', 'STOPNAME','ONTIME2']]

# Transposes StopName to X Axis
tod_route_stops_subset_trans = tod_route_stops_subset.set_index('ONTIME2')

# Pivots stops into the header for a matrix (Time vs Stop)
tod_route_stops_subset_m = tod_route_stops_subset.pivot(index='STOPNAME', columns='TOD')

# Shortens variable name
trssm = tod_route_stops_subset_m

In [158]:
#import numpy as np 
#from pandas import DataFrame
#import matplotlib.pyplot as plt


fig, ax = subplots(figsize=(0, 40))
tight_layout()
ax = plt.imshow( trssm, interpolation='nearest', cmap='Oranges').get_axes()
ax.set_xticks(np.linspace(0, len(trssm.columns)-1, len(trssm.columns) ))
ax.set_yticks(np.linspace(0, len(trssm.index)-1, len(trssm.index) ))
ax.set_xticklabels( trssm.columns )
ax.set_yticklabels( trssm.index )
ax.grid( 'on' )
ax.xaxis.tick_top()
#for i in range( trssm.index ):
#    for j in range( trssm.columns ):
#        ax.text( j, i, '{:.2f}'.format( trssm.iget_value( i, j ) ),
#                 size='medium', ha='center', va='center',
#                 path_effects=[patheffects.withSimplePatchShadow( shadow_rgbFace=(1,1,1) )])



In [27]:
store.close()