In [2]:
import pandas as pd 
from pandas import Timestamp
import numpy as np
import tables
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
%matplotlib inline
from ggplot import *

Time Series functions


In [3]:
df = pd.read_csv('Data/orig_data.csv',parse_dates=["starttime", "stoptime"])

In [4]:
def ts_agg(input_df, freq, aggBy):
    timeAgg = input_df.groupby('starttime', as_index=False).agg(len)[['starttime', 'bikeid']]
    timeAgg.columns = ['Starttime', 'Count']
    timeAgg.Count[timeAgg.Count.isnull()] = 0
    timeSeries = timeAgg.set_index('Starttime').resample(freq, how = aggBy)
    return timeSeries

In [6]:
# # Hodrick-Prescott filter
# def hp_filter(ts, lamb, starttime, endtime):    
#     cycle, trend = sm.tsa.filters.hpfilter(ts.Count, lamb)
#     decomp = ts[['Count']]
#     decomp["Cycle"] = cycle
#     decomp["Trend"] = trend

#     fig, ax = plt.subplots()
#     decomp[["Count", "Trend", "Cycle"]][starttime:endtime].plot(ax=ax, fontsize=16)
#     fig = plt.show()
#     return fig

Time Series by Month - Seasonal Patterns


In [5]:
timeSeries_MS = ts_agg(df, 'MS', 'sum')

In [8]:
# timeSeries_MS_plot = hp_filter(timeSeries_MS, 129600, '2013-07-01', '')



In [18]:
timeSeries_MS.index.name = None

In [21]:
plt.figure()
mnth = timeSeries_MS['Count'].plot(figsize=(20,5), legend=False, color = '#006BB6')
mnth.set_ylabel('Trip Count')
plt.savefig('Figures_TS/ByMonths.png')


Time Series by Week - Weekly Patterns


In [22]:
df_subset = df[(df.starttime >= Timestamp('2013-08-01')) & (df.starttime < Timestamp('2013-11-01'))]
timeSeries_W = ts_agg(df_subset, 'W', 'sum')

In [10]:
# timeSeries_W_plot = hp_filter(timeSeries_W, 129600, '2013-08-04', '')



In [32]:
Sans first week of August because it is not a full week.
timeSeries_W_plot = hp_filter(timeSeries_W, 129600, '2013-08-11', '')



In [37]:
timeSeries_W.index.name = None

plt.figure()
wk = timeSeries_W.Count.plot(figsize=(20,5), legend=False, color = '#006BB6', x_compat=True)
wk.set_ylabel('Trip Count')
plt.savefig('Figures_TS/ByWeek.png')


Trip rides by day of week between August 5, 2013 and October 27, 2013


In [38]:
week_subset = df[(df.starttime >= Timestamp('2013-08-05')) & (df.starttime < Timestamp('2013-10-28'))]

In [39]:
tripsByDay = week_subset

def day_type(x):
    days = ['Mon','Tues','Wednes','Thurs','Fri','Satur','Sun']
    return days[x.weekday()] + "day"

tripsByDay['dayofweek'] = tripsByDay['starttime'].apply(lambda x: x.weekday())
tripsByDay['dayofweek_name'] = tripsByDay['starttime'].apply(day_type)
tripsByDay['day'] = tripsByDay['starttime'].apply(lambda x: x.date())
tripsByDay['count'] = 1

In [43]:
p = ggplot(aes('dayofweek'), data=tripsByDay)
p += xlab('Day of Week')
p += ylab('Count')        
tripCnt = p + geom_bar(fill='#006bb6') + scale_x_continuous(labels=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) \
+ ggtitle('Total Trips Between August 5, 2013 - October 27, 2013')
tripCnt


Out[43]:
<ggplot: (368319397)>

In [44]:
ggsave(tripCnt, 'Figures_TS/TripCntByDayOfWeek.png')


Saving 11.0 x 8.0 in image.

Time Series by Day

Subscribers during the Weekdays


In [98]:
# subset of subscribers and weekdays
subscribers = week_subset[(week_subset.usertype == 'Subscriber')]

In [99]:
timeSeries_D_subs = ts_agg(subscribers, '10Min', 'sum')

In [100]:
timeSeries_D_subs['starttime'] = timeSeries_D_subs.index
timeSeries_D_subs['dayofweek'] = timeSeries_D_subs['starttime'].apply(lambda x: x.weekday())
timeSeries_D_subs['time'] = timeSeries_D_subs['starttime'].apply(lambda x: x.time())

In [106]:
def daybyname(x):
    days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
    return days[x]

avg10MinTrip_subs = timeSeries_D_subs.groupby(['dayofweek', 'time'], as_index=False).mean()
avg10MinTrip_subs.rename(columns = {'Count':'Average'}, inplace=True)
avg10MinTrip_subs['dayofweek_name'] = avg10MinTrip_subs['dayofweek'].apply(daybyname)

In [102]:
subs_wkday = avg10MinTrip_subs[(avg10MinTrip_subs.dayofweek < 5)]

In [188]:
plt.figure()
subs_1 = subs_wkday.Average.plot(figsize=(20,5), legend=False, color = '#006BB6')
subs_1.set_ylabel('Average Trip Count of Subscribers')
subs_1.set_xlabel('Monday through Friday at 10 Minute Intervals')
plt.savefig('Figures_TS/Subscribers_EntireWkday.png')



In [108]:
pivot10Min_subs = subs_wkday.pivot(index = "time", columns = "dayofweek_name", values = "Average").fillna(0)
pivot10Min_reindex_subs = pivot10Min_sub.reindex(columns=['Monday','Tuesday','Wednesday','Thursday','Friday'])
pivot10Min_reindex_subs.index.names = ['Time']

In [184]:
plt.figure()
subs_2 = pivot10Min_reindex_subs.plot(figsize=(20,5), legend=True)
subs_2.set_ylabel('Average Trip Count of Subscribers')
plt.savefig('Figures_TS/Subscribers_ByDay.png')


<matplotlib.figure.Figure at 0x125d2b110>

All usertypes during the Weekends


In [122]:
timeSeries_D = ts_agg(week_subset, '10Min', 'sum')

In [147]:
timeSeries_D['starttime'] = timeSeries_D.index
timeSeries_D['dayofweek'] = timeSeries_D['starttime'].apply(lambda x: x.weekday())
timeSeries_D['time'] = timeSeries_D['starttime'].apply(lambda x: x.time())

In [148]:
# avg10MinTrip = timeSeries_D.groupby(['dayofweek', 'time'], as_index=False).mean()

def daybyname(x):
    days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
    return days[x]

avg10MinTrip = timeSeries_D.groupby(['dayofweek', 'time'], as_index=False).mean()
avg10MinTrip.rename(columns = {'Count':'Average'}, inplace=True)
avg10MinTrip['dayofweek_name'] = avg10MinTrip['dayofweek'].apply(daybyname)

In [169]:
wknd = avg10MinTrip[(avg10MinTrip.dayofweek >= 5)]

In [189]:
plt.figure()
wknd_fig = wknd.Average.plot(figsize=(20,5), legend=False, color = '#006BB6')
wknd_fig.set_ylabel('Average Trip Count')
wknd_fig.set_xlabel('Saturday through Sunday at 10 Minute Intervals')

plt.savefig('Figures_TS/Wknd_EntireWknd.png')



In [175]:
pivot10Min_wknd = wknd.pivot(index = "time", columns = "dayofweek_name", values = "Average").fillna(0)
pivot10Min_reindex_wknd = pivot10Min_wknd.reindex(columns=['1', 'Saturday','Sunday'])
del pivot10Min_reindex_wknd['1']
pivot10Min_reindex_wknd.index.names = ['Time']

In [190]:
plt.figure()
wknd_fig_2 = pivot10Min_reindex_wknd.plot(figsize=(20,5), legend=True)
wknd_fig_2.set_ylabel('Average Trip Count')
plt.savefig('Figures_TS/Wknd_ByDay.png')


<matplotlib.figure.Figure at 0x11b677390>

Omitted figures


In [25]:
pivot10Min_new = pivot10Min
pivot10Min_new.columns.name = 'Day Of Week'
pivot10Min_new.index.name = 'Time'
pivot10Min_new['Entire Week'] = pivot10Min_new.mean(axis=1)
pivot10Min_new['Weekday'] = pivot10Min_new[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']].mean(axis=1)
pivot10Min_new['Weekend'] = pivot10Min_new[['Saturday','Sunday']].mean(axis=1)

In [40]:
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(15,15))
fig.subplots_adjust(hspace=0.4)

pivot10Min_new['Monday'].plot(ax=axes[0,0], color='#006BB6'); axes[0,0].set_title('Monday')
pivot10Min_new['Tuesday'].plot(ax=axes[0,1], color='#006BB6'); axes[0,1].set_title('Tuesday')
pivot10Min_new['Wednesday'].plot(ax=axes[1,0], color='#006BB6'); axes[1,0].set_title('Wednesday')
pivot10Min_new['Thursday'].plot(ax=axes[1,1], color='#006BB6'); axes[1,1].set_title('Thursday')
pivot10Min_new['Friday'].plot(ax=axes[2,0], color='#006BB6'); axes[2,0].set_title('Friday')
pivot10Min_new['Saturday'].plot(ax=axes[2,1], color='#006BB6'); axes[2,1].set_title('Saturday')
pivot10Min_new['Sunday'].plot(ax=axes[3,0], color='#006BB6'); axes[3,0].set_title('Sunday')
pivot10Min_new['Entire Week'].plot(ax=axes[3,1], color='#006BB6'); axes[3,1].set_title('Entire Week')
pivot10Min_new['Weekday'].plot(ax=axes[4,0], color='#006BB6'); axes[4,0].set_title('Weekday')
pivot10Min_new['Weekend'].plot(ax=axes[4,1], color='#006BB6'); axes[4,1].set_title('Weekend')

plt.savefig('Figures_TS/Figure_7.png')



In [42]:
pivot10Min_new.plot(subplots=True, figsize=(15, 25), color='#006BB6'); plt.legend(loc='best')
plt.savefig('Figures_TS/Figure_8.png')