In [2]:
from __future__ import print_function
import os
import pandas as pd
import numpy as np
from statsmodels.tsa import stattools
from statsmodels.tsa import seasonal
%matplotlib inline
from matplotlib import pyplot as plt
In [5]:
"""
This notebook illustrates time series decomposition by the statsmodels package.
Both additive and multiplicative models are demonstrated.
"""
Out[5]:
In [6]:
"""
Let us demonstrate the addtive model using Wisconsin Employment
Jan. 1961 – OCt. 1975 dataset.
"""
Out[6]:
In [12]:
#read the data from into a pandas.DataFrame
wisc_emp = pd.read_csv('datasets/wisconsin-employment-time-series.csv')
wisc_emp.index = wisc_emp['Month']
In [13]:
#Let's find out the shape of the DataFrame
print('Shape of the DataFrame:', wisc_emp.shape)
In [14]:
#Let's see first 10 rows of it
wisc_emp.head(10)
Out[14]:
In [15]:
#Check for missing values and remove the row
missing = (pd.isnull(wisc_emp['Employment'])) | (pd.isnull(wisc_emp['Month']))
print('Number of missing values found:', missing.sum())
wisc_emp = wisc_emp.loc[~missing, :]
In [16]:
#Run ADF test on the original time series
adf_result = stattools.adfuller(wisc_emp['Employment'], autolag='AIC')
In [17]:
print('p-val of the ADF test on irregular variations in employment data:', adf_result[1])
In [18]:
decompose_model = seasonal.seasonal_decompose(wisc_emp.Employment.tolist(), freq=12,
model='additive')
In [29]:
#Plot the original time series, trend, seasonal and random components
fig, axarr = plt.subplots(4, sharex=True)
fig.set_size_inches(5.5, 5.5)
wisc_emp['Employment'].plot(ax=axarr[0], color='b', linestyle='-')
axarr[0].set_title('Monthly Employment')
pd.Series(data=decompose_model.trend, index=wisc_emp.index).plot(color='r', linestyle='-', ax=axarr[1])
axarr[1].set_title('Trend component in monthly employment')
pd.Series(data=decompose_model.seasonal, index=wisc_emp.index).plot(color='g', linestyle='-', ax=axarr[2])
axarr[2].set_title('Seasonal component in monthly employment')
pd.Series(data=decompose_model.resid, index=wisc_emp.index).plot(color='k', linestyle='-', ax=axarr[3])
axarr[3].set_title('Irregular variations in monthly employment')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.xticks(rotation=10)
plt.savefig('plots/ch2/B07887_02_22.png', format='png', dpi=300)
In [13]:
#Run ADF test on the irregular variations
adf_result = stattools.adfuller(decompose_model.resid[np.where(np.isfinite(decompose_model.resid))[0]],
autolag='AIC')
In [14]:
print('p-val of the ADF test on irregular variations in employment data:', adf_result[1])
In [15]:
"""
The additive decompostion has been able to reduce the p-value
from 0.98 in case of the original time series
to 0.066 after decomposing.
"""
Out[15]:
In [16]:
"""
Now we will attempt decomposition of the original time
using a multiplicative model
"""
Out[16]:
In [17]:
decompose_model = seasonal.seasonal_decompose(wisc_emp.Employment.tolist(), freq=12,
model='multiplicative')
In [18]:
#Plot the original time series, trend, seasonal and random components
fig, axarr = plt.subplots(4, sharex=True)
fig.set_size_inches(5.5, 5.5)
wisc_emp['Employment'].plot(ax=axarr[0], color='b', linestyle='-')
axarr[0].set_title('Monthly Employment')
axarr[1].plot(decompose_model.trend, color='r', linestyle='-')
axarr[1].set_title('Trend component in monthly employment')
axarr[2].plot(decompose_model.seasonal, color='g', linestyle='-')
axarr[2].set_title('Seasonal component in monthly employment')
axarr[3].plot(decompose_model.resid, color='k', linestyle='-')
axarr[3].set_title('Irregular variations in monthly employment')
plt.savefig('plots/ch2/B07887_02_23.png', format='png', dpi=300)
In [19]:
#Run ADF test on the irregular variations
adf_result = stattools.adfuller(decompose_model.resid[np.where(np.isfinite(decompose_model.resid))[0]],
autolag='AIC')
In [20]:
print('p-val of the ADF test on irregular variations in employment data:', adf_result[1])
In [21]:
"""
Voila! The p-val has further reduced 0.00123.
The null hypothesis about non-stationarity of the irregular variations
can be rejected at even a level of confidence of 99 % (alpha=0.01).
This shows that the original time series has been de-stationarized to
the stationary irregular variations. Besides we have estimates of both trend-cycle
and seasonal components.
"""
Out[21]: