In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
pd.options.display.max_columns = 150
pd.options.display.float_format='{:,.2f}'.format
import sys
sys.path.append('C:/CASA/Workspace/sfdata_wrangler/sfdata_wrangler')
from SFMuniDataHelper import SFMuniDataHelper
from DataFrameViewer import DataFrameViewer
sfmuniHelper = SFMuniDataHelper()
viewer = DataFrameViewer()
In [3]:
# eventually convert filenames to arguments
hdffile = "D:/sfbusdatastore/sfmuni_aggregate.h5"
store = pd.HDFStore(hdffile)
store
Out[3]:
In [4]:
# shows available months of data
months = store.select_column('daily_route_stops', 'MONTH').unique()
months.sort()
months
Out[4]:
In [6]:
# daily system (121 rows, 28 columns) stats - choose time frame here
daily_system = store.select('daily_system',where=['DOW==1','MONTH>2008-03-01','MONTH<2012-03-01'])
In [54]:
#plot line graphs , care in interpreting links where months missing
"""
fig, axes = plt.subplots(nrows=7)
daily_system.plot(ax=axes[0], x='MONTH', y=['ON'],figsize=(20,40));
axes[0].set_title('ON')
daily_system.plot(ax=axes[1], x='MONTH', y=['CAPACITY']);
axes[1].set_title('CAPACITY')
daily_system.plot(ax=axes[2],x='MONTH', y=['PASSMILES']);
axes[2].set_title('PASSMILES')
daily_system.plot(ax=axes[3],x='MONTH', y=['VEHMILES']);
axes[3].set_title('VEHMILES')
daily_system.plot(ax=axes[4],x='MONTH', y=['TIMESTOP_DEV']);
axes[4].set_title('TIMESTOP_DEV')
daily_system.plot(ax=axes[5],x='MONTH', y=['ONTIME2']);
axes[5].set_title('ONTIME2')
daily_system.plot(ax=axes[6],x='MONTH', y=['ONTIME10']);
axes[6].set_title('ONTIME10')
"""
Out[54]:
In [90]:
plt.scatter(daily_system.MONTH, daily_system.ON)
daily_system.plot( x='MONTH', y=['ON'],figsize=(20,5));
daily_system.plot( x='MONTH', y=['OFF'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[90]:
In [92]:
plt.scatter(daily_system.MONTH, daily_system.CAPACITY)
daily_system.plot( x='MONTH', y=['CAPACITY'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[92]:
In [87]:
plt.scatter(daily_system.MONTH, daily_system.PASSMILES)
daily_system.plot( x='MONTH', y=['PASSMILES'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[87]:
In [94]:
plt.scatter(daily_system.MONTH, daily_system.VEHMILES)
daily_system.plot( x='MONTH', y=['VEHMILES'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[94]:
In [85]:
plt.scatter(daily_system.MONTH, daily_system.TIMESTOP_DEV)
daily_system.plot( x='MONTH', y=['TIMESTOP_DEV'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[85]:
In [83]:
plt.scatter(daily_system.MONTH, daily_system.ONTIME10)
daily_system.plot( x='MONTH', y=['ONTIME10'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[83]:
In [96]:
plt.scatter(daily_system.MONTH, daily_system.ONTIME2)
daily_system.plot( x='MONTH', y=['ONTIME2'],figsize=(20,5));
plt.scatter(daily_system.MONTH, daily_system.ONTIME10)
daily_system.plot( x='MONTH', y=['ONTIME10'],figsize=(20,5));
axvline(x='May 2010',color='k',ls='dashed')
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[96]:
In [109]:
# Daily
# Sample: Most affected routes overall
# {3,17,19,27,35,37,44,52}
daily_routes_query3 = store.select('daily_routes',where=['DOW==1','ROUTE==3','DIR==1'])
daily_routes_query17 = store.select('daily_routes',where=['DOW==1','ROUTE==17','DIR==1'])
daily_routes_query19 = store.select('daily_routes',where=['DOW==1','ROUTE==19','DIR==1'])
daily_routes_query27 = store.select('daily_routes',where=['DOW==1','ROUTE==27','DIR==1'])
daily_routes_query35 = store.select('daily_routes',where=['DOW==1','ROUTE==35','DIR==1'])
daily_routes_query37 = store.select('daily_routes',where=['DOW==1','ROUTE==37','DIR==1'])
daily_routes_query44 = store.select('daily_routes',where=['DOW==1','ROUTE==44','DIR==1'])
daily_routes_query52 = store.select('daily_routes',where=['DOW==1','ROUTE==52','DIR==1'])
daily_routes_query3.plot( x='MONTH', y='ON',figsize=(20,10), label='#3')
plt.scatter(daily_routes_query3.MONTH,daily_routes_query3.ON)
daily_routes_query17.plot( x='MONTH', y='ON',figsize=(20,10), label = '#17')
plt.scatter(daily_routes_query17.MONTH,daily_routes_query17.ON)
daily_routes_query19.plot( x='MONTH', y='ON',figsize=(20,10), label = '#19')
plt.scatter(daily_routes_query19.MONTH,daily_routes_query19.ON)
daily_routes_query27.plot( x='MONTH', y='ON',figsize=(20,10),label = '#27')
plt.scatter(daily_routes_query27.MONTH,daily_routes_query27.ON)
daily_routes_query35.plot( x='MONTH', y='ON',figsize=(20,10),label='#35')
plt.scatter(daily_routes_query35.MONTH,daily_routes_query35.ON)
daily_routes_query37.plot( x='MONTH', y='ON',figsize=(20,10),label='#37')
plt.scatter(daily_routes_query37.MONTH,daily_routes_query37.ON)
daily_routes_query44.plot( x='MONTH', y='ON',figsize=(20,10),label='#44')
plt.scatter(daily_routes_query44.MONTH,daily_routes_query44.ON)
daily_routes_query52.plot( x='MONTH', y='ON',figsize=(20,10),label='#52')
plt.scatter(daily_routes_query52.MONTH,daily_routes_query52.ON)
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
axvline(x='May 2010',color='k',ls='dashed')
Out[109]:
In [129]:
# Time of day routes, i.e. tod_routes
# Sample: Most affected routes overall
# {3,17,19,27,35,37,44,52}
# Select TODs {600,900,1400,1600,1900,2200}
todv = 'TOD==2200'
daily_routes_query3 = store.select('tod_routes',where=['DOW==1','ROUTE==3','DIR==1',todv])
daily_routes_query17 = store.select('tod_routes',where=['DOW==1','ROUTE==17','DIR==1',todv])
daily_routes_query19 = store.select('tod_routes',where=['DOW==1','ROUTE==19','DIR==1',todv])
daily_routes_query27 = store.select('tod_routes',where=['DOW==1','ROUTE==27','DIR==1',todv])
daily_routes_query35 = store.select('tod_routes',where=['DOW==1','ROUTE==35','DIR==1',todv])
daily_routes_query37 = store.select('tod_routes',where=['DOW==1','ROUTE==37','DIR==1',todv])
daily_routes_query44 = store.select('tod_routes',where=['DOW==1','ROUTE==44','DIR==1',todv])
daily_routes_query52 = store.select('tod_routes',where=['DOW==1','ROUTE==52','DIR==1',todv])
daily_routes_query3.plot( x='MONTH', y='ON',figsize=(20,10), label='#3')
plt.scatter(daily_routes_query3.MONTH,daily_routes_query3.ON)
daily_routes_query17.plot( x='MONTH', y='ON',figsize=(20,10), label = '#17')
plt.scatter(daily_routes_query17.MONTH,daily_routes_query17.ON)
daily_routes_query19.plot( x='MONTH', y='ON',figsize=(20,10), label = '#19')
plt.scatter(daily_routes_query19.MONTH,daily_routes_query19.ON)
daily_routes_query27.plot( x='MONTH', y='ON',figsize=(20,10),label = '#27')
plt.scatter(daily_routes_query27.MONTH,daily_routes_query27.ON)
daily_routes_query35.plot( x='MONTH', y='ON',figsize=(20,10),label='#35')
plt.scatter(daily_routes_query35.MONTH,daily_routes_query35.ON)
daily_routes_query37.plot( x='MONTH', y='ON',figsize=(20,10),label='#37')
plt.scatter(daily_routes_query37.MONTH,daily_routes_query37.ON)
daily_routes_query44.plot( x='MONTH', y='ON',figsize=(20,10),label='#44')
plt.scatter(daily_routes_query44.MONTH,daily_routes_query44.ON)
daily_routes_query52.plot( x='MONTH', y='ON',figsize=(20,10),label='#52')
plt.scatter(daily_routes_query52.MONTH,daily_routes_query52.ON)
legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
axvline(x='May 2010',color='k',ls='dashed')
Out[129]:
In [21]:
#Prepares data for heatmap
# Selects a specific route, in a specific month, in a specific direction, on a specific day of the week..
tod_route_stops_query = store.select('tod_route_stops',where=['DOW==1','MONTH>=2010-05-01','MONTH<=2010-05-01','ROUTE==17','DIR==1'])
# Crops to relevant data
tod_route_stops_subset = tod_route_stops_query[['TOD', 'STOPNAME','ONTIME2']]
# Transposes StopName to X Axis
tod_route_stops_subset_trans = tod_route_stops_subset.set_index('ONTIME2')
# Pivots stops into the header for a matrix (Time vs Stop)
tod_route_stops_subset_m = tod_route_stops_subset.pivot(index='STOPNAME', columns='TOD')
# Shortens variable name
trssm = tod_route_stops_subset_m
In [158]:
#import numpy as np
#from pandas import DataFrame
#import matplotlib.pyplot as plt
fig, ax = subplots(figsize=(0, 40))
tight_layout()
ax = plt.imshow( trssm, interpolation='nearest', cmap='Oranges').get_axes()
ax.set_xticks(np.linspace(0, len(trssm.columns)-1, len(trssm.columns) ))
ax.set_yticks(np.linspace(0, len(trssm.index)-1, len(trssm.index) ))
ax.set_xticklabels( trssm.columns )
ax.set_yticklabels( trssm.index )
ax.grid( 'on' )
ax.xaxis.tick_top()
#for i in range( trssm.index ):
# for j in range( trssm.columns ):
# ax.text( j, i, '{:.2f}'.format( trssm.iget_value( i, j ) ),
# size='medium', ha='center', va='center',
# path_effects=[patheffects.withSimplePatchShadow( shadow_rgbFace=(1,1,1) )])
In [27]:
store.close()