In [1]:
import pandas as pd
import numpy as np
%pylab inline
pylab.style.use('ggplot')
In [2]:
milk_df = pd.read_csv('milk.csv')
In [3]:
milk_df.head()
Out[3]:
In [4]:
milk_ts = milk_df.set_index('Month').squeeze()
In [5]:
milk_ts.index = pd.PeriodIndex(milk_ts.index, freq='M')
In [6]:
milk_df.head()
Out[6]:
In [7]:
milk_ts.plot()
Out[7]:
In [8]:
from statsmodels.tsa.stattools import adfuller
df_stat, p_val, lag, nobs, critical_vals, icbest = adfuller(milk_ts)
print('DF test statistic: {:.4f}, p_val: {:.4f}, lag={}'.format(df_stat, p_val, lag))
In [9]:
lag_1day = milk_ts.diff(1)[1:]
In [10]:
lag_1day.plot()
Out[10]:
In [11]:
df_stat, p_val, lag, nobs, critical_vals, icbest = adfuller(lag_1day)
print('DF test statistic: {:.4f}, p_val: {:.4f}, lag={}'.format(df_stat, p_val, lag))
In [12]:
seasonal = lag_1day.diff(12)[12:]
seasonal.plot()
Out[12]:
In [13]:
df_stat, p_val, lag, nobs, critical_vals, icbest = adfuller(seasonal)
print('DF test statistic: {:.4f}, p_val: {:.4f}, lag={}'.format(df_stat, p_val, lag))
In [14]:
from statsmodels.tsa.stattools import acf, pacf
In [15]:
milk_acf = acf(seasonal)[:20]
In [16]:
pd.Series(milk_acf, index=range(1, 21)).plot(kind='bar')
Out[16]:
In [17]:
milk_pacf = pacf(seasonal)[:20]
pd.Series(milk_pacf, index=range(1, 21)).plot(kind='bar')
Out[17]:
In [18]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
model = SARIMAX(milk_ts, trend='n', order=(0, 1 ,0), seasonal_order=(1, 1, 1, 12))
results = model.fit()
results.summary()
Out[18]:
In [19]:
milk_predicted = results.predict()
In [20]:
pd.concat({'actual': milk_ts, 'predicted': milk_predicted}, axis=1).plot(figsize=(10, 4))
Out[20]: