In [20]:
import warnings
warnings.filterwarnings('ignore')
In [399]:
import numpy as np
from pprint import pprint
import pandas as pd
from scipy import stats
import quandl
from sklearn.ensemble import RandomForestRegressor
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA, ARMA
from matplotlib import pylab as plt
%matplotlib inline
In [7]:
def get_dataframe (name) :
return quandl.get(name, start_date='2015-1-1', end_date='2017-11-1',
collapse='weekly')
In [9]:
dataframe1 = get_dataframe("NIKKEI/INDEX.4")
dataframe2 = get_dataframe("TSE/2502.4")
dataframe3 = get_dataframe("TSE/9020.4")
dataframe4 = get_dataframe("TSE/7203.4")
dataframe5 = get_dataframe("TSE/4503.4")
dataframe6 = get_dataframe("TSE/6758.4")
In [333]:
ts1 = dataframe1['Close Price'].rename('Close')
ts2 = dataframe2['Close'][:120]
ts3 = dataframe3['Close'][:120]
ts4 = dataframe4['Close'][:120]
ts5 = dataframe5['Close'][:120]
ts6 = dataframe6['Close'][:120]
In [436]:
pprint(sm.tsa.stattools.adfuller(ts2, regression='ctt', autolag = 'AIC'))
pprint(sm.tsa.stattools.adfuller(ts3, regression='ct', autolag = 'AIC'))
pprint(sm.tsa.stattools.adfuller(ts4, regression='ctt', autolag = 'AIC'))
pprint(sm.tsa.stattools.adfuller(ts5, regression='ct', autolag = 'AIC'))
pprint(sm.tsa.stattools.adfuller(ts6, regression='ctt', autolag = 'AIC'))
In [436]:
In [402]:
In [402]:
In [403]:
In [447]:
pprint(sm.tsa.arma_order_select_ic(ts2, ic='aic'))
pprint(sm.tsa.arma_order_select_ic(ts3, ic='aic'))
pprint(sm.tsa.arma_order_select_ic(ts4, ic='aic'))
pprint(sm.tsa.arma_order_select_ic(ts5, ic='aic'))
pprint(sm.tsa.arma_order_select_ic((ts6 - ts6.shift(1)).dropna(), ic='aic'))
In [446]:
In [446]:
In [446]:
In [446]:
In [266]:
ts2_arima = ARMA(ts2, order = (1, 2)).fit()
ts3_arima = ARMA(ts3, order = (3, 2)).fit()
ts4_arima = ARMA(ts4, order = (1, 0)).fit()
ts5_arima = ARMA(ts5, order = (3, 2)).fit()
ts6_arima = ARIMA(ts6, order = (1, 1, 0)).fit()
In [448]:
r_forest = RandomForestRegressor(n_estimators = 1000, criterion = 'mse', random_state = 1, n_jobs = 1)
r_forest.fit(np.vstack((ts2.values,ts3.values,ts4.values,ts5.values,ts6.values)).T,
ts1[:120].values)
Out[448]:
In [448]:
In [338]:
ts2_predict = ts2_arima.predict('2015-1-18','2017-11-5')
ts3_predict = ts3_arima.predict('2015-1-18','2017-11-5')
ts4_predict = ts4_arima.predict('2015-1-18','2017-11-5')
ts5_predict = ts5_arima.predict('2015-1-18','2017-11-5')
ts6_predict = ts6_arima.predict('2015-1-18','2017-11-5')
In [449]:
ts1_pred_with_RF = r_forest.predict(
np.vstack((ts2_predict, ts3_predict, ts4_predict, ts5_predict, ts6_predict)).T)
ts1_pred_with_RF_dataframe = \
pd.DataFrame({'Close': ts1_pred_with_RF.tolist()},index = pd.DatetimeIndex(periods = 147, freq = 'W', start = '2015-1-18'))
hold(True)
plt.plot(ts1_pred_with_RF_dataframe)
plt.plot(ts1)
Out[449]:
In [341]:
In [449]:
In [276]:
sm.tsa.stattools.adfuller(ts1[:120], regression='ctt', autolag = 'AIC')
Out[276]:
In [254]:
sm.tsa.arma_order_select_ic(ts1[:120], ic='aic', trend='nc')
Out[254]:
In [321]:
ts1_arima = ARMA(ts1[:120], order = (1, 0)).fit()
ts1_pred_with_ARMA_dataframe = ts1_arima.predict('2015-1-18','2017-11-5')
hold(True)
plt.plot(ts1_pred_with_ARMA_dataframe)
plt.plot(ts1)
Out[321]:
In [301]:
ts1_s1 = ts1.shift(1).dropna()[:120]
ts1_y = ts1[1:][:120]
r_forest2 = RandomForestRegressor(n_estimators = 1000, criterion = 'mse', random_state = 1, n_jobs = 1)
r_forest.fit(np.vstack((ts1_s1.values)),ts1_y.values)
ts1_pred_with_RF2 = r_forest.predict(np.vstack((ts1.shift(1).dropna().values)))
ts1_pred_with_RF2_dataframe = \
pd.DataFrame({'Close': ts1_pred_with_RF2.T.tolist()},
index = pd.DatetimeIndex(periods = 147,
freq = 'W', start = '2015-1-18'))
hold(True)
plt.plot(ts1_pred_with_RF2_dataframe)
plt.plot(ts1)
Out[301]:
In [451]:
train_with_ARMA_resid = ts1[1:][:120] - ts1_pred_with_ARMA_dataframe.tolist()[:120]
train_with_RF2_resid = ts1[1:][:120] - ts1_pred_with_RF2[:120]
MSE_train_RF = np.array([(elem * elem) for elem in train_with_RF_resid]).mean()
MSE_train_ARMA = np.array([(elem * elem) for elem in train_with_ARMA_resid]).mean()
MSE_train_RF2 = np.array([(elem * elem) for elem in train_with_RF2_resid]).mean()
print('train: MSE')
print('RandomForest and ARIMA model: ', MSE_train_RF)
print(' AR(I)MA model : ', MSE_train_ARMA)
print(' Random Forest : ', MSE_train_RF2)
In [394]:
ts1_pred_with_ARMA_dataframe2 = ts1_arima.predict('2015-1-18','2017-11-5')
train_with_ARMA_resid2 = ts1[:120] - ts1_pred_with_ARMA_dataframe2.tolist()[:120]
MSE_train_ARMA2 = np.array([(elem * elem) for elem in train_with_ARMA_resid2]).mean()
print('train: MSE')
print(' AR(I)MA model? : ', MSE_train_ARMA2)
In [361]:
pred_with_RF_resid = ts1[1:][120:] - ts1_pred_with_RF[120:]
pred_with_ARMA_resid = ts1[1:][120:] - ts1_pred_with_ARMA_dataframe.tolist()[120:]
pred_with_RF2_resid = ts1[1:][120:] - ts1_pred_with_RF2[120:]
MSE_pred_RF = np.array([(elem * elem) for elem in pred_with_RF_resid]).mean()
MSE_pred_ARMA = np.array([(elem * elem) for elem in pred_with_ARMA_resid]).mean()
MSE_pred_RF2 = np.array([(elem * elem) for elem in pred_with_RF2_resid]).mean()
print('predict: MSE')
print('RandomForest and ARIMA model: ', MSE_pred_RF)
print(' AR(I)MA model : ', MSE_pred_ARMA)
print(' Random Forest : ', MSE_pred_RF2)
pred_with_ARMA_resid2 = ts1[1:][120:] - ts1_pred_with_ARMA_dataframe2.tolist()[120:]
MSE_pred_ARMA2 = np.array([(elem * elem) for elem in pred_with_ARMA_resid2]).mean()
print(' AR(I)MA model? : ', MSE_pred_ARMA2)
In [369]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
fig = plt.figure(figsize = (12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(ts1, lags=40, ax = ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(ts1, lags=40, ax = ax2)
In [469]:
ts1_SARIMA = sm.tsa.SARIMAX(ts1, order=(1,0,0), seasonal_order=(1,1,0,12)).fit()
ts1_pred_with_SARIMA_dataframe = ts1_SARIMA.predict('2015-1-18','2017-11-5')
hold(True)
plt.plot(ts1_pred_with_SARIMA_dataframe)
plt.plot(ts1)
Out[469]:
In [382]:
plt.plot(ts1_pred_with_SARIMA_dataframe[15:])
plt.plot(ts1[15:])
Out[382]:
In [393]:
pred_with_SARIMA_resid2 = ts1[15:][1:][105:] - ts1_pred_with_SARIMA_dataframe.tolist()[15:][105:]
MSE_pred_SARIMA = np.array([(elem * elem) for elem in pred_with_SARIMA_resid2]).mean()
print('train: MSE')
print(' SARIMA model? : ', MSE_pred_SARIMA)
pred_with_SARIMA_resid2 = ts1[15:][1:][:105] - ts1_pred_with_SARIMA_dataframe.tolist()[15:][:105]
MSE_pred_SARIMA = np.array([(elem * elem) for elem in pred_with_SARIMA_resid2]).mean()
print('predict: MSE')
print(' SARIMA model? : ', MSE_pred_SARIMA)
In [327]:
In [395]:
ts2.tail()
Out[395]:
In [396]:
ts1[:120].tail()
Out[396]:
In [ ]:
In [387]: