In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# download the dataset here: 
## https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60#!ds=22u3&display=line

In [3]:
np.random.seed(410)

In [4]:
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
plt.plot(series)
plt.show()



In [5]:
series.head()


Out[5]:
Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: int64

In [6]:
series.columns = ['mothly_passengers']

In [7]:
series.head()


Out[7]:
Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: int64

In [9]:
from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import autocorrelation_plot

autocorrelation_plot(series)
plt.show()



In [10]:
# Check Stationary
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller

def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(window=12,center=False).mean()
    rolstd = timeseries.rolling(window=12,center=False).std()

    #Plot rolling statistics:
    plt.figure(figsize=(9,7))
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='green', label='Rolling Mean')
    std = plt.plot(rolstd, color='red', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print dfoutput

In [11]:
test_stationarity(series)

# not stationary
# mean is changing along the time
# Test Statistics is higher than any Critical Value


Results of Dickey-Fuller Test:
Test Statistic                   0.815369
p-value                          0.991880
#Lags Used                      13.000000
Number of Observations Used    130.000000
Critical Value (5%)             -2.884042
Critical Value (1%)             -3.481682
Critical Value (10%)            -2.578770
dtype: float64

In [12]:
# try to make the data tom stationary first
## reduce trend by using log()
import numpy as np

ts_log = np.log(series)
plt.figure(figsize=(9,7))
plt.plot(ts_log)
plt.show()



In [13]:
## differentiating
## both series and ts_log are showing seasonality, so, I will just try differentiating and decomposing

ts_log_diff = ts_log - ts_log.shift()
plt.figure(figsize=(9,7))
plt.plot(ts_log_diff)
plt.show()



In [14]:
ts_log_diff.head()


Out[14]:
Month
1949-01-01         NaN
1949-02-01    0.052186
1949-03-01    0.112117
1949-04-01   -0.022990
1949-05-01   -0.064022
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: float64

In [15]:
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)

# Test Statistic is lower than 10% Critical Value, which means there is 90% confidence that the data is stationary now


Results of Dickey-Fuller Test:
Test Statistic                  -2.717131
p-value                          0.071121
#Lags Used                      14.000000
Number of Observations Used    128.000000
Critical Value (5%)             -2.884398
Critical Value (1%)             -3.482501
Critical Value (10%)            -2.578960
dtype: float64

In [16]:
# try forcasting to see how it works
from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')

plt.figure(figsize=(20,10))

#Plot ACF: 
## q – The lag value where the ACF chart crosses the upper confidence interval for the first time (in this case q=2)
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
## p – The lag value where the PACF chart crosses the upper confidence interval for the first time (in this case p=2)
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()



In [17]:
from statsmodels.tsa.arima_model import ARIMA
# RSS is for the values of residuals

# AR model
model = ARIMA(ts_log, order=(2, 1, 0))  
results_AR = model.fit(disp=-1) 
plt.figure(figsize=(20,10))
plt.plot(ts_log_diff)
plt.plot(results_AR.fittedvalues, color='red')
plt.title('Differenting RSS: %.4f'% sum((results_AR.fittedvalues-ts_log_diff)**2))
plt.show()



In [18]:
# MA Model

model = ARIMA(ts_log, order=(0, 1, 2))  
results_MA = model.fit(disp=-1)  
plt.figure(figsize=(20,10))
plt.plot(ts_log_diff)
plt.plot(results_MA.fittedvalues, color='red')
plt.title('Differenting RSS: %.4f'% sum((results_MA.fittedvalues-ts_log_diff)**2))
plt.show()



In [19]:
# combined mobel

model = ARIMA(ts_log, order=(2, 1, 2))  
results_ARIMA = model.fit(disp=-1)  
plt.figure(figsize=(20,10))
plt.plot(ts_log_diff)
plt.plot(results_ARIMA.fittedvalues, color='red')
plt.title('Differenting RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff)**2))
plt.show()



In [20]:
# Based on RSS above, Combined model for Differenting is better, try to see forcasted results
## Scale back to original value, and see how this combined value performs

# Step 1 - store predicted result as seperated result
predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True)
print predictions_ARIMA_diff.head()


Month
1949-02-01    0.009580
1949-03-01    0.017491
1949-04-01    0.027670
1949-05-01   -0.004521
1949-06-01   -0.023889
dtype: float64

In [21]:
# Step 2 - Convert the differencing to log scale
## Add these differences consecutively to the base number
## An easy way to do it is to first determine the cumulative sum at index and then add it to the base number

predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
print predictions_ARIMA_diff_cumsum.head()


Month
1949-02-01    0.009580
1949-03-01    0.027071
1949-04-01    0.054742
1949-05-01    0.050221
1949-06-01    0.026331
dtype: float64

In [22]:
predictions_ARIMA_log = pd.Series(ts_log.iloc[0], index=ts_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0)
predictions_ARIMA_log.head()


Out[22]:
Month
1949-01-01    4.718499
1949-02-01    4.728079
1949-03-01    4.745570
1949-04-01    4.773241
1949-05-01    4.768720
dtype: float64

In [23]:
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.figure(figsize=(20,10))
plt.plot(series, color='red')
plt.plot(predictions_ARIMA, color='green')
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA-series)**2)/len(series)))
plt.show()



In [24]:
## try decomposing, and see whether it could make it better

from statsmodels.tsa.seasonal import seasonal_decompose

# trend, seasonality are separated out from data, and we can model the residuals
decomposition = seasonal_decompose(ts_log)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.figure(figsize=(10,10))
plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()



In [25]:
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

# This one has even lower than 1% Critical Value, which means 99% confidence for stationary
## but it's mean and variance looks more unstable


Results of Dickey-Fuller Test:
Test Statistic                -6.332387e+00
p-value                        2.885059e-08
#Lags Used                     9.000000e+00
Number of Observations Used    1.220000e+02
Critical Value (5%)           -2.885538e+00
Critical Value (1%)           -3.485122e+00
Critical Value (10%)          -2.579569e+00
dtype: float64

In [26]:
# try forecast with decomposed result

from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(ts_log_decompose, nlags=20)
lag_pacf = pacf(ts_log_decompose, nlags=20, method='ols')

plt.figure(figsize=(20,10))

#Plot ACF: 
## q – The lag value where the ACF chart crosses the upper confidence interval for the first time (in this case q=2)
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_decompose)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_decompose)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
## p – The lag value where the PACF chart crosses the upper confidence interval for the first time (in this case p=2)
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_decompose)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_decompose)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()



In [27]:
# AR model
model = ARIMA(ts_log, order=(2, 1, 0))  
results_AR = model.fit(disp=-1) 
plt.figure(figsize=(20,10))
plt.plot(ts_log_decompose)
plt.plot(results_AR.fittedvalues, color='red')
result = (results_AR.fittedvalues-ts_log_decompose)**2
result.dropna(inplace=True)
plt.title('Decompose RSS: %.4f'% sum(result))
plt.show()



In [28]:
# MA Model

model = ARIMA(ts_log, order=(0, 1, 2))  
results_MA = model.fit(disp=-1)  
plt.figure(figsize=(20,10))
plt.plot(ts_log_decompose)
plt.plot(results_MA.fittedvalues, color='red')
result = (results_MA.fittedvalues-ts_log_decompose)**2
result.dropna(inplace=True)
plt.title('Decompose RSS: %.4f'% sum(result))
plt.show()



In [29]:
# combined mobel

model = ARIMA(ts_log, order=(2, 1, 2))  
results_ARIMA = model.fit(disp=-1)  
plt.figure(figsize=(20,10))
plt.plot(ts_log_decompose)
plt.plot(results_ARIMA.fittedvalues, color='red')
result = (results_ARIMA.fittedvalues-ts_log_decompose)**2
result.dropna(inplace=True)
plt.title('Decompose RSS: %.4f'% sum(result))
plt.show()



In [122]:
# It's seems that decompose AR model works best. Let's try forecasting here
# So far, it's quite challenging to add seasonality and trend back to predicted result,
## I'm using predicted result to compare with residual here

predictions_AR = pd.Series(results_AR.fittedvalues, copy=True)
print predictions_AR.head()
plt.figure(figsize=(20,10))
plt.plot(residual, color='red')
plt.plot(predictions_AR, color='green')
result = (predictions_AR-residual)**2
result.dropna(inplace=True)
plt.title('RMSE: %.4f'% np.sqrt(sum(result)/len(series)))
plt.show()


Month
1949-02-01    0.009600
1949-03-01    0.018167
1949-04-01    0.026435
1949-05-01   -0.015768
1949-06-01   -0.002144
dtype: float64

In [31]:
# LSTM, using decomposed result

print residual.head()
print
print(residual.isnull().sum())


Month
1949-07-01   -0.050884
1949-08-01   -0.048415
1949-09-01    0.001223
1949-10-01    0.003156
1949-11-01    0.005749
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: float64

0

In [32]:
# split into training and testing data
training_size = int(len(residual)*0.67)

train_data = residual[0:training_size]
test_data = residual[training_size:len(residual)]
print train_data.head()
print test_data.head()


Month
1949-07-01   -0.050884
1949-08-01   -0.048415
1949-09-01    0.001223
1949-10-01    0.003156
1949-11-01    0.005749
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: float64
Month
1956-11-01   -0.005110
1956-12-01   -0.008792
1957-01-01   -0.004277
1957-02-01   -0.032018
1957-03-01   -0.008046
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: float64

In [33]:
residual.values[4:10]


Out[33]:
array([ 0.00574867,  0.01104541, -0.03909288,  0.06930588,  0.03723574,
        0.01402768])

In [34]:
# Now we need to create 2 numpy arrays X, Y
## X represent time t value, while Y is time t+1 value

def look_forward_dataset(data, forward_step=1):
    X = [] 
    Y = []
    for i in range(len(data)-forward_step):
        X.append([data[i]])   # pay attention to the structure here, important
        Y.append([data[i+1]])
    return np.array(X), np.array(Y)

time_step = 1
train_X, train_Y = look_forward_dataset(train_data, time_step)
test_X, test_Y = look_forward_dataset(test_data, time_step)

In [35]:
# LSTM expects the input in a format as [samples, time steps, dimension]
## In this case, time steps is 1

train_X = np.reshape(train_X, train_X.shape + (1,))
test_X = np.reshape(test_X, test_X.shape + (1,))

train_X[4:10]


Out[35]:
array([[[ 0.00574867]],

       [[ 0.01104541]],

       [[-0.03909288]],

       [[ 0.06930588]],

       [[ 0.03723574]],

       [[ 0.01402768]]])

In [36]:
# fit LSTM model

model = Sequential()
# 4 LSTM blocks, and LSTM doesn't care sample amount, so input_shape just need (time_step, dimension)
model.add(LSTM(4, input_shape=train_X.shape[1:]))  
model.add(Dense(1))  # bring the output down to 1 single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_X, train_Y, epochs=100, batch_size=1, verbose=2,
         validation_data=(test_X, test_Y))  # using the default sigmoid function here


Train on 87 samples, validate on 43 samples
Epoch 1/100
1s - loss: 0.0010 - val_loss: 0.0011
Epoch 2/100
0s - loss: 0.0010 - val_loss: 0.0011
Epoch 3/100
1s - loss: 9.9073e-04 - val_loss: 0.0011
Epoch 4/100
0s - loss: 0.0010 - val_loss: 0.0011
Epoch 5/100
0s - loss: 9.9286e-04 - val_loss: 0.0011
Epoch 6/100
0s - loss: 9.7113e-04 - val_loss: 0.0011
Epoch 7/100
0s - loss: 9.8000e-04 - val_loss: 0.0010
Epoch 8/100
0s - loss: 9.8346e-04 - val_loss: 0.0010
Epoch 9/100
0s - loss: 0.0010 - val_loss: 0.0010
Epoch 10/100
0s - loss: 9.6136e-04 - val_loss: 0.0010
Epoch 11/100
0s - loss: 9.7869e-04 - val_loss: 0.0010
Epoch 12/100
0s - loss: 9.5682e-04 - val_loss: 9.9886e-04
Epoch 13/100
0s - loss: 9.5129e-04 - val_loss: 0.0010
Epoch 14/100
0s - loss: 9.7353e-04 - val_loss: 0.0010
Epoch 15/100
0s - loss: 9.6266e-04 - val_loss: 9.8584e-04
Epoch 16/100
0s - loss: 9.5312e-04 - val_loss: 0.0011
Epoch 17/100
0s - loss: 9.5837e-04 - val_loss: 9.7942e-04
Epoch 18/100
0s - loss: 9.6167e-04 - val_loss: 9.8237e-04
Epoch 19/100
0s - loss: 9.4510e-04 - val_loss: 9.7537e-04
Epoch 20/100
0s - loss: 9.6189e-04 - val_loss: 9.7178e-04
Epoch 21/100
0s - loss: 9.5502e-04 - val_loss: 9.7753e-04
Epoch 22/100
0s - loss: 9.3877e-04 - val_loss: 9.7342e-04
Epoch 23/100
0s - loss: 9.4863e-04 - val_loss: 9.8268e-04
Epoch 24/100
0s - loss: 9.7728e-04 - val_loss: 9.7095e-04
Epoch 25/100
0s - loss: 9.8090e-04 - val_loss: 9.6369e-04
Epoch 26/100
0s - loss: 9.3707e-04 - val_loss: 9.6466e-04
Epoch 27/100
0s - loss: 9.5575e-04 - val_loss: 9.6479e-04
Epoch 28/100
1s - loss: 9.5589e-04 - val_loss: 9.6817e-04
Epoch 29/100
1s - loss: 9.5850e-04 - val_loss: 9.8148e-04
Epoch 30/100
0s - loss: 9.5160e-04 - val_loss: 9.5948e-04
Epoch 31/100
0s - loss: 9.5779e-04 - val_loss: 9.7799e-04
Epoch 32/100
0s - loss: 9.4994e-04 - val_loss: 9.7067e-04
Epoch 33/100
0s - loss: 9.6580e-04 - val_loss: 9.5956e-04
Epoch 34/100
0s - loss: 9.3503e-04 - val_loss: 9.5895e-04
Epoch 35/100
0s - loss: 9.4640e-04 - val_loss: 9.9898e-04
Epoch 36/100
0s - loss: 9.7489e-04 - val_loss: 9.5777e-04
Epoch 37/100
0s - loss: 9.4231e-04 - val_loss: 9.5781e-04
Epoch 38/100
1s - loss: 9.4026e-04 - val_loss: 0.0010
Epoch 39/100
0s - loss: 9.2442e-04 - val_loss: 9.8301e-04
Epoch 40/100
0s - loss: 9.7390e-04 - val_loss: 9.5673e-04
Epoch 41/100
0s - loss: 9.6592e-04 - val_loss: 9.7247e-04
Epoch 42/100
0s - loss: 9.6211e-04 - val_loss: 9.5836e-04
Epoch 43/100
0s - loss: 9.4818e-04 - val_loss: 9.5676e-04
Epoch 44/100
0s - loss: 9.5135e-04 - val_loss: 9.5464e-04
Epoch 45/100
0s - loss: 9.4677e-04 - val_loss: 0.0010
Epoch 46/100
0s - loss: 9.5731e-04 - val_loss: 9.5500e-04
Epoch 47/100
0s - loss: 9.5175e-04 - val_loss: 9.5726e-04
Epoch 48/100
0s - loss: 9.6880e-04 - val_loss: 9.5649e-04
Epoch 49/100
1s - loss: 9.5665e-04 - val_loss: 9.6950e-04
Epoch 50/100
0s - loss: 9.4427e-04 - val_loss: 9.5334e-04
Epoch 51/100
0s - loss: 9.4278e-04 - val_loss: 9.5464e-04
Epoch 52/100
0s - loss: 9.8447e-04 - val_loss: 0.0010
Epoch 53/100
0s - loss: 9.6631e-04 - val_loss: 9.5658e-04
Epoch 54/100
0s - loss: 9.3981e-04 - val_loss: 9.5373e-04
Epoch 55/100
1s - loss: 9.5274e-04 - val_loss: 9.5823e-04
Epoch 56/100
0s - loss: 9.7583e-04 - val_loss: 9.6614e-04
Epoch 57/100
0s - loss: 9.3902e-04 - val_loss: 9.5871e-04
Epoch 58/100
0s - loss: 9.4225e-04 - val_loss: 9.6571e-04
Epoch 59/100
0s - loss: 9.6034e-04 - val_loss: 0.0010
Epoch 60/100
0s - loss: 9.2899e-04 - val_loss: 9.6858e-04
Epoch 61/100
0s - loss: 0.0010 - val_loss: 9.5743e-04
Epoch 62/100
0s - loss: 9.4800e-04 - val_loss: 9.5534e-04
Epoch 63/100
0s - loss: 9.4481e-04 - val_loss: 9.9591e-04
Epoch 64/100
0s - loss: 9.5475e-04 - val_loss: 9.5425e-04
Epoch 65/100
0s - loss: 9.3985e-04 - val_loss: 9.7277e-04
Epoch 66/100
0s - loss: 9.5598e-04 - val_loss: 9.5588e-04
Epoch 67/100
0s - loss: 9.8072e-04 - val_loss: 9.5893e-04
Epoch 68/100
0s - loss: 9.4318e-04 - val_loss: 9.6434e-04
Epoch 69/100
0s - loss: 9.5864e-04 - val_loss: 9.6357e-04
Epoch 70/100
0s - loss: 9.4019e-04 - val_loss: 9.6739e-04
Epoch 71/100
0s - loss: 9.5251e-04 - val_loss: 9.5487e-04
Epoch 72/100
0s - loss: 9.4879e-04 - val_loss: 9.8176e-04
Epoch 73/100
0s - loss: 9.4780e-04 - val_loss: 9.5236e-04
Epoch 74/100
0s - loss: 9.4922e-04 - val_loss: 9.6136e-04
Epoch 75/100
0s - loss: 9.3717e-04 - val_loss: 9.5946e-04
Epoch 76/100
0s - loss: 9.6821e-04 - val_loss: 9.5545e-04
Epoch 77/100
0s - loss: 9.3704e-04 - val_loss: 9.6060e-04
Epoch 78/100
0s - loss: 9.5066e-04 - val_loss: 9.5962e-04
Epoch 79/100
0s - loss: 9.7445e-04 - val_loss: 9.5590e-04
Epoch 80/100
0s - loss: 9.4655e-04 - val_loss: 9.6180e-04
Epoch 81/100
0s - loss: 9.4774e-04 - val_loss: 9.5288e-04
Epoch 82/100
0s - loss: 9.6671e-04 - val_loss: 9.5448e-04
Epoch 83/100
0s - loss: 9.4605e-04 - val_loss: 9.5629e-04
Epoch 84/100
0s - loss: 9.3969e-04 - val_loss: 9.6426e-04
Epoch 85/100
0s - loss: 9.4583e-04 - val_loss: 9.8196e-04
Epoch 86/100
0s - loss: 0.0010 - val_loss: 9.5571e-04
Epoch 87/100
0s - loss: 9.3956e-04 - val_loss: 9.6144e-04
Epoch 88/100
0s - loss: 9.2661e-04 - val_loss: 9.8803e-04
Epoch 89/100
0s - loss: 9.7872e-04 - val_loss: 9.5296e-04
Epoch 90/100
0s - loss: 9.5604e-04 - val_loss: 9.6114e-04
Epoch 91/100
0s - loss: 9.4365e-04 - val_loss: 9.5866e-04
Epoch 92/100
0s - loss: 9.2929e-04 - val_loss: 9.7198e-04
Epoch 93/100
0s - loss: 0.0010 - val_loss: 9.6042e-04
Epoch 94/100
0s - loss: 9.3881e-04 - val_loss: 9.9456e-04
Epoch 95/100
0s - loss: 9.3018e-04 - val_loss: 9.9477e-04
Epoch 96/100
0s - loss: 9.4075e-04 - val_loss: 9.5374e-04
Epoch 97/100
0s - loss: 9.4169e-04 - val_loss: 9.7204e-04
Epoch 98/100
0s - loss: 9.7392e-04 - val_loss: 9.8222e-04
Epoch 99/100
0s - loss: 9.7375e-04 - val_loss: 9.5988e-04
Epoch 100/100
0s - loss: 9.6583e-04 - val_loss: 9.5451e-04
Out[36]:
<keras.callbacks.History at 0x12b9fad90>

In [51]:
# make predictions
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

train_RMSE = math.sqrt(mean_squared_error(train_Y[:,0], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(test_Y[:,0], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)


Train RMSE: 0.0304029
Test RMSE: 0.0308951

In [105]:
# In order to plot residual, you need to change its format

origin_values = residual.values
origin_data = []
for elem in origin_values:
    origin_data.append([elem])
origin_data = np.array(origin_data)
origin_data[4:10]


Out[105]:
array([[ 0.00574867],
       [ 0.01104541],
       [-0.03909288],
       [ 0.06930588],
       [ 0.03723574],
       [ 0.01402768]])

In [116]:
train_predict_plot = np.empty_like(origin_data)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict),:] = train_predict

test_predict_plot = np.empty_like(origin_data)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+2:len(origin_data),:] = test_predict

In [118]:
plt.plot(origin_data)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()