In [2]:
import pandas as pd
from pandas import Timestamp
import numpy as np
import tables
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
%matplotlib inline
from ggplot import *
In [3]:
df = pd.read_csv('Data/orig_data.csv',parse_dates=["starttime", "stoptime"])
In [4]:
def ts_agg(input_df, freq, aggBy):
timeAgg = input_df.groupby('starttime', as_index=False).agg(len)[['starttime', 'bikeid']]
timeAgg.columns = ['Starttime', 'Count']
timeAgg.Count[timeAgg.Count.isnull()] = 0
timeSeries = timeAgg.set_index('Starttime').resample(freq, how = aggBy)
return timeSeries
In [6]:
# # Hodrick-Prescott filter
# def hp_filter(ts, lamb, starttime, endtime):
# cycle, trend = sm.tsa.filters.hpfilter(ts.Count, lamb)
# decomp = ts[['Count']]
# decomp["Cycle"] = cycle
# decomp["Trend"] = trend
# fig, ax = plt.subplots()
# decomp[["Count", "Trend", "Cycle"]][starttime:endtime].plot(ax=ax, fontsize=16)
# fig = plt.show()
# return fig
In [5]:
timeSeries_MS = ts_agg(df, 'MS', 'sum')
In [8]:
# timeSeries_MS_plot = hp_filter(timeSeries_MS, 129600, '2013-07-01', '')
In [18]:
timeSeries_MS.index.name = None
In [21]:
plt.figure()
mnth = timeSeries_MS['Count'].plot(figsize=(20,5), legend=False, color = '#006BB6')
mnth.set_ylabel('Trip Count')
plt.savefig('Figures_TS/ByMonths.png')
In [22]:
df_subset = df[(df.starttime >= Timestamp('2013-08-01')) & (df.starttime < Timestamp('2013-11-01'))]
timeSeries_W = ts_agg(df_subset, 'W', 'sum')
In [10]:
# timeSeries_W_plot = hp_filter(timeSeries_W, 129600, '2013-08-04', '')
In [32]:
Sans first week of August because it is not a full week.
timeSeries_W_plot = hp_filter(timeSeries_W, 129600, '2013-08-11', '')
In [37]:
timeSeries_W.index.name = None
plt.figure()
wk = timeSeries_W.Count.plot(figsize=(20,5), legend=False, color = '#006BB6', x_compat=True)
wk.set_ylabel('Trip Count')
plt.savefig('Figures_TS/ByWeek.png')
In [38]:
week_subset = df[(df.starttime >= Timestamp('2013-08-05')) & (df.starttime < Timestamp('2013-10-28'))]
In [39]:
tripsByDay = week_subset
def day_type(x):
days = ['Mon','Tues','Wednes','Thurs','Fri','Satur','Sun']
return days[x.weekday()] + "day"
tripsByDay['dayofweek'] = tripsByDay['starttime'].apply(lambda x: x.weekday())
tripsByDay['dayofweek_name'] = tripsByDay['starttime'].apply(day_type)
tripsByDay['day'] = tripsByDay['starttime'].apply(lambda x: x.date())
tripsByDay['count'] = 1
In [43]:
p = ggplot(aes('dayofweek'), data=tripsByDay)
p += xlab('Day of Week')
p += ylab('Count')
tripCnt = p + geom_bar(fill='#006bb6') + scale_x_continuous(labels=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) \
+ ggtitle('Total Trips Between August 5, 2013 - October 27, 2013')
tripCnt
Out[43]:
In [44]:
ggsave(tripCnt, 'Figures_TS/TripCntByDayOfWeek.png')
In [98]:
# subset of subscribers and weekdays
subscribers = week_subset[(week_subset.usertype == 'Subscriber')]
In [99]:
timeSeries_D_subs = ts_agg(subscribers, '10Min', 'sum')
In [100]:
timeSeries_D_subs['starttime'] = timeSeries_D_subs.index
timeSeries_D_subs['dayofweek'] = timeSeries_D_subs['starttime'].apply(lambda x: x.weekday())
timeSeries_D_subs['time'] = timeSeries_D_subs['starttime'].apply(lambda x: x.time())
In [106]:
def daybyname(x):
days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
return days[x]
avg10MinTrip_subs = timeSeries_D_subs.groupby(['dayofweek', 'time'], as_index=False).mean()
avg10MinTrip_subs.rename(columns = {'Count':'Average'}, inplace=True)
avg10MinTrip_subs['dayofweek_name'] = avg10MinTrip_subs['dayofweek'].apply(daybyname)
In [102]:
subs_wkday = avg10MinTrip_subs[(avg10MinTrip_subs.dayofweek < 5)]
In [188]:
plt.figure()
subs_1 = subs_wkday.Average.plot(figsize=(20,5), legend=False, color = '#006BB6')
subs_1.set_ylabel('Average Trip Count of Subscribers')
subs_1.set_xlabel('Monday through Friday at 10 Minute Intervals')
plt.savefig('Figures_TS/Subscribers_EntireWkday.png')
In [108]:
pivot10Min_subs = subs_wkday.pivot(index = "time", columns = "dayofweek_name", values = "Average").fillna(0)
pivot10Min_reindex_subs = pivot10Min_sub.reindex(columns=['Monday','Tuesday','Wednesday','Thursday','Friday'])
pivot10Min_reindex_subs.index.names = ['Time']
In [184]:
plt.figure()
subs_2 = pivot10Min_reindex_subs.plot(figsize=(20,5), legend=True)
subs_2.set_ylabel('Average Trip Count of Subscribers')
plt.savefig('Figures_TS/Subscribers_ByDay.png')
In [122]:
timeSeries_D = ts_agg(week_subset, '10Min', 'sum')
In [147]:
timeSeries_D['starttime'] = timeSeries_D.index
timeSeries_D['dayofweek'] = timeSeries_D['starttime'].apply(lambda x: x.weekday())
timeSeries_D['time'] = timeSeries_D['starttime'].apply(lambda x: x.time())
In [148]:
# avg10MinTrip = timeSeries_D.groupby(['dayofweek', 'time'], as_index=False).mean()
def daybyname(x):
days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
return days[x]
avg10MinTrip = timeSeries_D.groupby(['dayofweek', 'time'], as_index=False).mean()
avg10MinTrip.rename(columns = {'Count':'Average'}, inplace=True)
avg10MinTrip['dayofweek_name'] = avg10MinTrip['dayofweek'].apply(daybyname)
In [169]:
wknd = avg10MinTrip[(avg10MinTrip.dayofweek >= 5)]
In [189]:
plt.figure()
wknd_fig = wknd.Average.plot(figsize=(20,5), legend=False, color = '#006BB6')
wknd_fig.set_ylabel('Average Trip Count')
wknd_fig.set_xlabel('Saturday through Sunday at 10 Minute Intervals')
plt.savefig('Figures_TS/Wknd_EntireWknd.png')
In [175]:
pivot10Min_wknd = wknd.pivot(index = "time", columns = "dayofweek_name", values = "Average").fillna(0)
pivot10Min_reindex_wknd = pivot10Min_wknd.reindex(columns=['1', 'Saturday','Sunday'])
del pivot10Min_reindex_wknd['1']
pivot10Min_reindex_wknd.index.names = ['Time']
In [190]:
plt.figure()
wknd_fig_2 = pivot10Min_reindex_wknd.plot(figsize=(20,5), legend=True)
wknd_fig_2.set_ylabel('Average Trip Count')
plt.savefig('Figures_TS/Wknd_ByDay.png')
In [25]:
pivot10Min_new = pivot10Min
pivot10Min_new.columns.name = 'Day Of Week'
pivot10Min_new.index.name = 'Time'
pivot10Min_new['Entire Week'] = pivot10Min_new.mean(axis=1)
pivot10Min_new['Weekday'] = pivot10Min_new[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']].mean(axis=1)
pivot10Min_new['Weekend'] = pivot10Min_new[['Saturday','Sunday']].mean(axis=1)
In [40]:
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(15,15))
fig.subplots_adjust(hspace=0.4)
pivot10Min_new['Monday'].plot(ax=axes[0,0], color='#006BB6'); axes[0,0].set_title('Monday')
pivot10Min_new['Tuesday'].plot(ax=axes[0,1], color='#006BB6'); axes[0,1].set_title('Tuesday')
pivot10Min_new['Wednesday'].plot(ax=axes[1,0], color='#006BB6'); axes[1,0].set_title('Wednesday')
pivot10Min_new['Thursday'].plot(ax=axes[1,1], color='#006BB6'); axes[1,1].set_title('Thursday')
pivot10Min_new['Friday'].plot(ax=axes[2,0], color='#006BB6'); axes[2,0].set_title('Friday')
pivot10Min_new['Saturday'].plot(ax=axes[2,1], color='#006BB6'); axes[2,1].set_title('Saturday')
pivot10Min_new['Sunday'].plot(ax=axes[3,0], color='#006BB6'); axes[3,0].set_title('Sunday')
pivot10Min_new['Entire Week'].plot(ax=axes[3,1], color='#006BB6'); axes[3,1].set_title('Entire Week')
pivot10Min_new['Weekday'].plot(ax=axes[4,0], color='#006BB6'); axes[4,0].set_title('Weekday')
pivot10Min_new['Weekend'].plot(ax=axes[4,1], color='#006BB6'); axes[4,1].set_title('Weekend')
plt.savefig('Figures_TS/Figure_7.png')
In [42]:
pivot10Min_new.plot(subplots=True, figsize=(15, 25), color='#006BB6'); plt.legend(loc='best')
plt.savefig('Figures_TS/Figure_8.png')