In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.seasonal import seasonal_decompose
In [2]:
from pandas.core import datetools
In [3]:
# Source data https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/AirPassengers.html
df=pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/AirPassengers.csv")
In [4]:
df.head()
Out[4]:
In [5]:
df.head(13)
Out[5]:
In [7]:
df.tail(13)
Out[7]:
In [9]:
start = datetime.datetime.strptime("1949-01-01", "%Y-%m-%d")
type(start)
Out[9]:
In [10]:
print(start)
In [12]:
len(df)
Out[12]:
In [13]:
date_list = [start + relativedelta(months=x) for x in range(0,df.shape[0])]
In [15]:
print(date_list[0:4])
In [21]:
for x in range(0,144):
print(x)
In [19]:
date_list = [start + relativedelta(months=x) for x in range(0,len(df))]
In [22]:
date_list[140:144]
Out[22]:
In [17]:
df['index'] =date_list
df.set_index(['index'], inplace=True)
df.index.name=None
In [23]:
del df['Unnamed: 0']
del df['time']
df.head()
Out[23]:
In [24]:
df.tail()
Out[24]:
In [25]:
df['AirPassengers'] = df.AirPassengers.apply(lambda x: int(x)*1000)
In [26]:
df.head()
Out[26]:
In [27]:
df.AirPassengers.plot(figsize=(12,8), title= 'Monthly Passengers', fontsize=14)
plt.savefig('month_ridership.png', bbox_inches='tight')
In [28]:
decomposition = seasonal_decompose(df.AirPassengers, freq=12)
fig = plt.figure()
fig = decomposition.plot()
fig.set_size_inches(15, 8)
In [29]:
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)
print(p)
In [30]:
import itertools
import warnings
In [31]:
# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))
print(pdq)
In [32]:
# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
In [33]:
y=df
In [34]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
except:
continue
In [35]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
c3=[]
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
c3.append( results.aic)
except:
continue
In [36]:
c3
Out[36]:
In [37]:
import numpy as np
index_min = np.argmin(c3)
In [39]:
np.min(c3)
Out[39]:
In [38]:
index_min
Out[38]:
In [40]:
c3[index_min]
Out[40]:
In [44]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
c4=[]
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
c4.append('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
except:
continue
In [46]:
c4[index_min]
Out[46]:
In [48]:
order1=c4[index_min][6:13]
order1
Out[48]:
In [54]:
order1=[int(s) for s in order1.split(',')]
order1
Out[54]:
In [49]:
seasonal_order1=c4[index_min][16:27]
seasonal_order1
Out[49]:
In [55]:
seasonal_order1=[int(s) for s in seasonal_order1.split(',')]
seasonal_order1
Out[55]:
In [56]:
from statsmodels.tsa.x13 import x13_arima_select_order
In [57]:
mod = sm.tsa.statespace.SARIMAX(df.AirPassengers, trend='n', order=order1, seasonal_order=seasonal_order1)
In [58]:
results = mod.fit()
print (results.summary())
In [61]:
df[121:144]
Out[61]:
In [60]:
results.predict(start=120,end=144)
Out[60]:
In [47]:
ts2=pd.read_csv('C:\\Users\\KOGENTIX\\Desktop\\trainingWeek2\\co2-ppm-mauna-loa-19651980.csv')
In [ ]: