In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
In [2]:
train = pd.read_csv('ts_PassengerTraffic_Train.csv')
test = pd.read_csv('ts_PassengerTraffic_Test.csv')
print(train.shape)
train.head()
Out[2]:
In [3]:
test.head()
Out[3]:
In [4]:
# convert object to datetime format
train['Datetime'] = pd.to_datetime(train['Datetime'], format = '%d-%m-%Y %H:%M')
test['Datetime'] = pd.to_datetime(test['Datetime'], format = '%d-%m-%Y %H:%M')
train.head()
Out[4]:
In [5]:
train['hour'] = train['Datetime'].dt.hour
train['YearMonth'] = train['Datetime'].dt.year.astype(str).str.cat(train['Datetime'].dt.month.astype(str), sep='-')
train.head()
Out[5]:
In [40]:
ax = train[['Count', 'YearMonth']].plot(kind='bar', title ="Passenger Count per Month",
figsize=(15, 10), legend=True, fontsize=12)
ax.set_xlabel("Year-Month", fontsize=12)
ax.set_ylabel("Passenger Count", fontsize=12)
ax.xaxis.set_major_locator(mdates.YearLocator(1)) # set xticks
ax.xaxis.set_major_formatter(mdates.DateFormatter('%y'))
plt.setp(ax.xaxis.get_majorticklabels())
plt.show()
In [81]:
import plotly.offline as plty
import plotly.graph_objs as go
plty.init_notebook_mode()
# daily trend - it's growing with the population capacity
daily_count = train.groupby(train['Datetime'].dt.date)['Count'].sum()
plty.iplot([go.Scatter(
x=daily_count.index,
y=daily_count
)])
In [82]:
import plotly.offline as plty
import plotly.graph_objs as go
plty.init_notebook_mode()
# monthly trend
monthly_count = train.groupby('YearMonth')['Count'].sum()
print(monthly_count.head())
plty.iplot([go.Scatter(
x=monthly_count.index,
y=monthly_count
)])
In [6]:
# The data has so much noise, we can aggregate the data
## hourly average fraction
hourly_frac = train.groupby(['hour']).mean()/np.sum(train.groupby(['hour']).mean())
hourly_frac.drop(['ID'], axis = 1, inplace = True)
hourly_frac.columns = ['fraction']
hourly_frac.head()
Out[6]:
In [7]:
train.index = train.Datetime
train.drop(['ID','hour','Datetime', 'YearMonth'], axis = 1, inplace = True)
train.head()
Out[7]:
In [8]:
daily_train = train.resample('D').sum() # sum up daily counts
daily_train.head()
Out[8]:
In [9]:
# Prophet needs to name the datetime as 'ds', and name the label as 'y'
daily_train['ds'] = daily_train.index
daily_train['y'] = daily_train.Count
daily_train.drop('Count', inplace=True, axis=1)
daily_train.head()
Out[9]:
In [10]:
# fit the prophet model
import warnings
warnings.filterwarnings("ignore")
m = Prophet(daily_seasonality = True, seasonality_prior_scale=0.1) # seasonality_prior_scale decides the influence of seasonality
m.fit(daily_train)
Out[10]:
Prophet()
, check https://github.com/facebook/prophet/blob/master/python/fbprophet/forecaster.py
In [11]:
periods = int(test.shape[0]/24)
print(periods)
future = m.make_future_dataframe(periods=periods)
forecast = m.predict(future)
In [12]:
forecast.head()
Out[12]:
In [13]:
# see forcast components - trend, yearly seasonality, and weekly seasonality of the time series
fig1 = m.plot_components(forecast)
In [118]:
# See forecasts
firg2 = m.plot(forecast)
In [14]:
# Use the above fraction to convert daily forecast to hourly forecast
forecast.head()
Out[14]:
In [15]:
test = test.rename(index=str, columns={'Datetime': 'ds'})
test.head()
Out[15]:
In [16]:
for df in [test, forecast]:
df['hour'] = df.ds.dt.hour
df['day'] = df.ds.dt.day
df['month'] = df.ds.dt.month
df['year'] = df.ds.dt.year
test_forecast = pd.merge(test,forecast, on=['day','month','year'], how='left')
test_forecast.head()
Out[16]:
In [18]:
test_forecast.head()
Out[18]:
In [20]:
test_forecast = pd.merge(test_forecast, hourly_frac, left_on='hour_x', right_index=True, how='left')
test_forecast.head()
Out[20]:
In [21]:
test_forecast['prediction'] = test_forecast['yhat']*test_forecast['fraction']
In [22]:
test_forecast.columns
Out[22]:
In [23]:
cols = ['ID','hour_x','prediction']
test_forecast= test_forecast[cols]
test_forecast.head()
Out[23]:
In [24]:
test_forecast.isnull().sum()/test_forecast.shape[0]
Out[24]: