In [25]:
%pylab inline
In [26]:
# Import libraries
from __future__ import absolute_import, division, print_function
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import math
# Graphing Libraries
import matplotlib.pyplot as pyplt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
import seaborn as sns
sns.set_style("white")
from IPython.display import display
In [3]:
data = pd.read_csv('data/AirPassengers.csv',
parse_dates=[0],
index_col='Month')
data.head()
Out[3]:
In [4]:
ts = data['#Passengers']
ts.head(10)
Out[4]:
In [5]:
ts['1949-01-01':'1949-05-01']
Out[5]:
In [6]:
pyplt.plot(ts);
After plotting the data, I can see a general upward trend, as well as seasonal trends.
In [7]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
"""
Generate rolling statistic plot
Attributes
----------
timeseries: pandas series
"""
#Determing rolling statistics
rolmean = pd.rolling_mean(timeseries, window=12)
rolstd = pd.rolling_std(timeseries, window=12)
#Plot rolling statistics:
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std')
pyplt.legend(loc='best')
pyplt.title('Rolling Mean & Standard Deviation')
pyplt.show(block=False)
def Dickey_Fuller(timeseries):
"""
Perform Dickey Fuller test for stationarity on the a timeseries
Attributes
----------
timeseries: pandas series
"""
#Perform Dickey-Fuller test:
print ('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print (dfoutput)
In [8]:
test_stationarity(ts)
In [9]:
Dickey_Fuller(ts)
This is not a stationary timeseries. The mean isn't constant. Further, the test statistics is way bigger than the critical values.
In [ ]: