In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# download the dataset here: 
## https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60#!ds=22u3&display=line

In [3]:
# METHOD 1 - time t to predict t+1
## look back is 1, time step is 1

np.random.seed(410)

In [4]:
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
plt.plot(series)
plt.show()



In [5]:
series.head()


Out[5]:
Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
Name: International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60, dtype: int64

In [6]:
origin_values = series.values.astype('float32')

# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1))   # reshape(-1,1) here is important
origin_values[4:10]


Out[6]:
array([[ 0.03281853],
       [ 0.05984557],
       [ 0.08494207],
       [ 0.08494207],
       [ 0.06177607],
       [ 0.02895753]], dtype=float32)

In [7]:
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]


96 48
[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]

In [8]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
    dataX = []
    dataY = []
    for i in range(len(dataset)-look_back):
        dataX.append(dataset[i:(i+look_back),0])
        dataY.append(dataset[i+look_back,0])
    return np.array(dataX), np.array(dataY)

In [9]:
time_step = 1
look_back = 1
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)

print trainX[4:10]
print trainY[4:10]
print trainX.shape


[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[ 0.05984557  0.08494207  0.08494207  0.06177607  0.02895753  0.        ]
(95, 1)

In [10]:
# reshape data into [samples, time_step, dimension], so that LSTM can read
## In this case, X, Y should be the same dimension, since Y is the sequence you want to predict from X

trainX = np.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], time_step, testX.shape[1]))

In [11]:
print trainX[4:10]
print testX[4:10]


[[[ 0.03281853]]

 [[ 0.05984557]]

 [[ 0.08494207]]

 [[ 0.08494207]]

 [[ 0.06177607]]

 [[ 0.02895753]]]
[[[ 0.48455599]]

 [[ 0.61389959]]

 [[ 0.69691122]]

 [[ 0.70077217]]

 [[ 0.57915056]]

 [[ 0.46911195]]]

In [47]:
trainX.shape[1:]


Out[47]:
(1, 1)

In [48]:
# LSTM model

model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:]))  # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1))  # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
         validation_data=(testX, testY))


Train on 95 samples, validate on 47 samples
Epoch 1/100
1s - loss: 0.0328 - val_loss: 0.1836
Epoch 2/100
0s - loss: 0.0150 - val_loss: 0.1219
Epoch 3/100
0s - loss: 0.0123 - val_loss: 0.1048
Epoch 4/100
0s - loss: 0.0115 - val_loss: 0.0894
Epoch 5/100
0s - loss: 0.0107 - val_loss: 0.0869
Epoch 6/100
0s - loss: 0.0100 - val_loss: 0.0771
Epoch 7/100
0s - loss: 0.0091 - val_loss: 0.0699
Epoch 8/100
0s - loss: 0.0083 - val_loss: 0.0627
Epoch 9/100
0s - loss: 0.0074 - val_loss: 0.0569
Epoch 10/100
0s - loss: 0.0066 - val_loss: 0.0438
Epoch 11/100
0s - loss: 0.0059 - val_loss: 0.0374
Epoch 12/100
0s - loss: 0.0051 - val_loss: 0.0308
Epoch 13/100
0s - loss: 0.0045 - val_loss: 0.0296
Epoch 14/100
0s - loss: 0.0039 - val_loss: 0.0224
Epoch 15/100
0s - loss: 0.0035 - val_loss: 0.0197
Epoch 16/100
0s - loss: 0.0031 - val_loss: 0.0159
Epoch 17/100
0s - loss: 0.0028 - val_loss: 0.0137
Epoch 18/100
0s - loss: 0.0025 - val_loss: 0.0120
Epoch 19/100
0s - loss: 0.0024 - val_loss: 0.0116
Epoch 20/100
0s - loss: 0.0022 - val_loss: 0.0092
Epoch 21/100
0s - loss: 0.0022 - val_loss: 0.0090
Epoch 22/100
0s - loss: 0.0022 - val_loss: 0.0088
Epoch 23/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 24/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 25/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 26/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 27/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 28/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 29/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 30/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 31/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 32/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 33/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 34/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 35/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 36/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 37/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 38/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 39/100
0s - loss: 0.0022 - val_loss: 0.0086
Epoch 40/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 41/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 42/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 43/100
0s - loss: 0.0020 - val_loss: 0.0085
Epoch 44/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 45/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 46/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 47/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 48/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 49/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 50/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 51/100
0s - loss: 0.0021 - val_loss: 0.0097
Epoch 52/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 53/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 54/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 55/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 56/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 57/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 58/100
0s - loss: 0.0022 - val_loss: 0.0087
Epoch 59/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 60/100
0s - loss: 0.0020 - val_loss: 0.0083
Epoch 61/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 62/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 63/100
1s - loss: 0.0020 - val_loss: 0.0091
Epoch 64/100
0s - loss: 0.0020 - val_loss: 0.0088
Epoch 65/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 66/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 67/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 68/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 69/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 70/100
0s - loss: 0.0021 - val_loss: 0.0091
Epoch 71/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 72/100
0s - loss: 0.0020 - val_loss: 0.0091
Epoch 73/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 74/100
0s - loss: 0.0021 - val_loss: 0.0082
Epoch 75/100
0s - loss: 0.0020 - val_loss: 0.0090
Epoch 76/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 77/100
0s - loss: 0.0020 - val_loss: 0.0087
Epoch 78/100
0s - loss: 0.0020 - val_loss: 0.0091
Epoch 79/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 80/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 81/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 82/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 83/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 84/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 85/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 86/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 87/100
0s - loss: 0.0020 - val_loss: 0.0083
Epoch 88/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 89/100
0s - loss: 0.0021 - val_loss: 0.0091
Epoch 90/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 91/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 92/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 93/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 94/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 95/100
0s - loss: 0.0022 - val_loss: 0.0091
Epoch 96/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 97/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 98/100
0s - loss: 0.0021 - val_loss: 0.0082
Epoch 99/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 100/100
0s - loss: 0.0020 - val_loss: 0.0092
Out[48]:
<keras.callbacks.History at 0x12779bd90>

In [50]:
# make prediction

train_predict = model.predict(trainX)
test_predict = model.predict(testX)

train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)


Train RMSE: 0.0448690
Test RMSE: 0.0961093

In [51]:
print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape


[[ 0.04352565]
 [ 0.06731328]
 [ 0.08974933]
 [ 0.08974933]
 [ 0.0690277 ]
 [ 0.04016139]]
(144, 1)
(95, 1)
(47, 1)

In [52]:
# plot the prediction

train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict

test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back:len(origin_values)-1, :] = test_predict

plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()



In [13]:
# METHOD 2 - WINDOW method
## time t-2, t-1, t to predict time t+1
## Compared with Method 1, you just change look_back value
## and when you have n look_back, your features (trainX, testX) will have n columns

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [19]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

origin_values = series.values.astype('float32')

# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1))   # reshape(-1,1) here is important

train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]

print len(train), len(test)
print train[4:10]
print test[4:10]


96 48
[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[[ 0.48455599]
 [ 0.61389959]
 [ 0.69691122]
 [ 0.70077217]
 [ 0.57915056]
 [ 0.46911195]]

In [20]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
    dataX = []
    dataY = []
    for i in range(len(dataset)-look_back):
        dataX.append(dataset[i:(i+look_back),0])
        dataY.append(dataset[i+look_back,0])
    return np.array(dataX), np.array(dataY)

In [21]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape


[[ 0.03281853  0.05984557  0.08494207]
 [ 0.05984557  0.08494207  0.08494207]
 [ 0.08494207  0.08494207  0.06177607]
 [ 0.08494207  0.06177607  0.02895753]
 [ 0.06177607  0.02895753  0.        ]
 [ 0.02895753  0.          0.02702703]]
[ 0.08494207  0.06177607  0.02895753  0.          0.02702703  0.02123553]
(93, 3)

In [22]:
# reshape data into [samples, time_step, dimension], so that LSTM can read
## In this case, X, Y should be the same dimension, since Y is the sequence you want to predict from X
trainX = np.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], time_step, testX.shape[1]))

print trainX[4:10]
print testX[4:10]


[[[ 0.03281853  0.05984557  0.08494207]]

 [[ 0.05984557  0.08494207  0.08494207]]

 [[ 0.08494207  0.08494207  0.06177607]]

 [[ 0.08494207  0.06177607  0.02895753]]

 [[ 0.06177607  0.02895753  0.        ]]

 [[ 0.02895753  0.          0.02702703]]]
[[[ 0.48455599  0.61389959  0.69691122]]

 [[ 0.61389959  0.69691122  0.70077217]]

 [[ 0.69691122  0.70077217  0.57915056]]

 [[ 0.70077217  0.57915056  0.46911195]]

 [[ 0.57915056  0.46911195  0.38803086]]

 [[ 0.46911195  0.38803086  0.44787642]]]

In [23]:
# LSTM model

model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:]))  # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1))  # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
         validation_data=(testX, testY))


Train on 93 samples, validate on 45 samples
Epoch 1/100
1s - loss: 0.0235 - val_loss: 0.1215
Epoch 2/100
0s - loss: 0.0097 - val_loss: 0.0744
Epoch 3/100
0s - loss: 0.0082 - val_loss: 0.0641
Epoch 4/100
0s - loss: 0.0074 - val_loss: 0.0518
Epoch 5/100
0s - loss: 0.0066 - val_loss: 0.0435
Epoch 6/100
0s - loss: 0.0059 - val_loss: 0.0398
Epoch 7/100
0s - loss: 0.0054 - val_loss: 0.0338
Epoch 8/100
0s - loss: 0.0051 - val_loss: 0.0284
Epoch 9/100
0s - loss: 0.0048 - val_loss: 0.0270
Epoch 10/100
0s - loss: 0.0045 - val_loss: 0.0245
Epoch 11/100
0s - loss: 0.0044 - val_loss: 0.0248
Epoch 12/100
0s - loss: 0.0043 - val_loss: 0.0235
Epoch 13/100
0s - loss: 0.0042 - val_loss: 0.0225
Epoch 14/100
0s - loss: 0.0041 - val_loss: 0.0201
Epoch 15/100
0s - loss: 0.0040 - val_loss: 0.0213
Epoch 16/100
0s - loss: 0.0040 - val_loss: 0.0201
Epoch 17/100
0s - loss: 0.0039 - val_loss: 0.0196
Epoch 18/100
0s - loss: 0.0038 - val_loss: 0.0190
Epoch 19/100
0s - loss: 0.0039 - val_loss: 0.0183
Epoch 20/100
0s - loss: 0.0039 - val_loss: 0.0205
Epoch 21/100
0s - loss: 0.0038 - val_loss: 0.0206
Epoch 22/100
0s - loss: 0.0039 - val_loss: 0.0207
Epoch 23/100
0s - loss: 0.0037 - val_loss: 0.0186
Epoch 24/100
0s - loss: 0.0036 - val_loss: 0.0197
Epoch 25/100
0s - loss: 0.0036 - val_loss: 0.0182
Epoch 26/100
0s - loss: 0.0036 - val_loss: 0.0193
Epoch 27/100
0s - loss: 0.0035 - val_loss: 0.0186
Epoch 28/100
0s - loss: 0.0035 - val_loss: 0.0190
Epoch 29/100
0s - loss: 0.0035 - val_loss: 0.0167
Epoch 30/100
0s - loss: 0.0034 - val_loss: 0.0170
Epoch 31/100
0s - loss: 0.0034 - val_loss: 0.0169
Epoch 32/100
0s - loss: 0.0034 - val_loss: 0.0167
Epoch 33/100
0s - loss: 0.0034 - val_loss: 0.0175
Epoch 34/100
0s - loss: 0.0032 - val_loss: 0.0151
Epoch 35/100
0s - loss: 0.0033 - val_loss: 0.0164
Epoch 36/100
0s - loss: 0.0033 - val_loss: 0.0172
Epoch 37/100
0s - loss: 0.0032 - val_loss: 0.0157
Epoch 38/100
0s - loss: 0.0031 - val_loss: 0.0156
Epoch 39/100
0s - loss: 0.0031 - val_loss: 0.0173
Epoch 40/100
0s - loss: 0.0032 - val_loss: 0.0166
Epoch 41/100
0s - loss: 0.0031 - val_loss: 0.0169
Epoch 42/100
0s - loss: 0.0031 - val_loss: 0.0172
Epoch 43/100
0s - loss: 0.0031 - val_loss: 0.0184
Epoch 44/100
0s - loss: 0.0031 - val_loss: 0.0156
Epoch 45/100
0s - loss: 0.0030 - val_loss: 0.0158
Epoch 46/100
0s - loss: 0.0030 - val_loss: 0.0163
Epoch 47/100
0s - loss: 0.0030 - val_loss: 0.0155
Epoch 48/100
0s - loss: 0.0030 - val_loss: 0.0158
Epoch 49/100
0s - loss: 0.0030 - val_loss: 0.0155
Epoch 50/100
0s - loss: 0.0029 - val_loss: 0.0147
Epoch 51/100
0s - loss: 0.0029 - val_loss: 0.0156
Epoch 52/100
0s - loss: 0.0029 - val_loss: 0.0144
Epoch 53/100
0s - loss: 0.0028 - val_loss: 0.0134
Epoch 54/100
0s - loss: 0.0030 - val_loss: 0.0129
Epoch 55/100
0s - loss: 0.0028 - val_loss: 0.0154
Epoch 56/100
0s - loss: 0.0028 - val_loss: 0.0165
Epoch 57/100
0s - loss: 0.0028 - val_loss: 0.0145
Epoch 58/100
0s - loss: 0.0027 - val_loss: 0.0147
Epoch 59/100
0s - loss: 0.0027 - val_loss: 0.0174
Epoch 60/100
0s - loss: 0.0027 - val_loss: 0.0131
Epoch 61/100
0s - loss: 0.0028 - val_loss: 0.0139
Epoch 62/100
0s - loss: 0.0027 - val_loss: 0.0157
Epoch 63/100
0s - loss: 0.0027 - val_loss: 0.0130
Epoch 64/100
0s - loss: 0.0026 - val_loss: 0.0133
Epoch 65/100
0s - loss: 0.0026 - val_loss: 0.0142
Epoch 66/100
0s - loss: 0.0026 - val_loss: 0.0154
Epoch 67/100
0s - loss: 0.0026 - val_loss: 0.0134
Epoch 68/100
0s - loss: 0.0025 - val_loss: 0.0136
Epoch 69/100
0s - loss: 0.0025 - val_loss: 0.0160
Epoch 70/100
0s - loss: 0.0026 - val_loss: 0.0138
Epoch 71/100
0s - loss: 0.0024 - val_loss: 0.0130
Epoch 72/100
0s - loss: 0.0024 - val_loss: 0.0126
Epoch 73/100
0s - loss: 0.0025 - val_loss: 0.0136
Epoch 74/100
0s - loss: 0.0024 - val_loss: 0.0153
Epoch 75/100
0s - loss: 0.0024 - val_loss: 0.0123
Epoch 76/100
0s - loss: 0.0025 - val_loss: 0.0143
Epoch 77/100
0s - loss: 0.0024 - val_loss: 0.0128
Epoch 78/100
0s - loss: 0.0024 - val_loss: 0.0140
Epoch 79/100
0s - loss: 0.0024 - val_loss: 0.0139
Epoch 80/100
0s - loss: 0.0024 - val_loss: 0.0132
Epoch 81/100
0s - loss: 0.0023 - val_loss: 0.0111
Epoch 82/100
0s - loss: 0.0025 - val_loss: 0.0146
Epoch 83/100
0s - loss: 0.0024 - val_loss: 0.0120
Epoch 84/100
0s - loss: 0.0023 - val_loss: 0.0133
Epoch 85/100
0s - loss: 0.0023 - val_loss: 0.0117
Epoch 86/100
0s - loss: 0.0023 - val_loss: 0.0123
Epoch 87/100
0s - loss: 0.0023 - val_loss: 0.0111
Epoch 88/100
0s - loss: 0.0024 - val_loss: 0.0130
Epoch 89/100
0s - loss: 0.0023 - val_loss: 0.0131
Epoch 90/100
0s - loss: 0.0023 - val_loss: 0.0134
Epoch 91/100
0s - loss: 0.0023 - val_loss: 0.0139
Epoch 92/100
0s - loss: 0.0022 - val_loss: 0.0125
Epoch 93/100
0s - loss: 0.0023 - val_loss: 0.0121
Epoch 94/100
0s - loss: 0.0022 - val_loss: 0.0110
Epoch 95/100
0s - loss: 0.0023 - val_loss: 0.0138
Epoch 96/100
0s - loss: 0.0022 - val_loss: 0.0142
Epoch 97/100
0s - loss: 0.0022 - val_loss: 0.0114
Epoch 98/100
0s - loss: 0.0023 - val_loss: 0.0135
Epoch 99/100
0s - loss: 0.0022 - val_loss: 0.0132
Epoch 100/100
0s - loss: 0.0022 - val_loss: 0.0128
Out[23]:
<keras.callbacks.History at 0x10abd2c10>

In [29]:
# make prediction

train_predict = model.predict(trainX)
test_predict = model.predict(testX)

train_RMSE = math.sqrt(mean_squared_error(trainY, train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY, test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)


Train RMSE: 0.0459457
Test RMSE: 0.1132370

In [53]:
# plot the prediction

train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict

test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2-1:len(origin_values)-1, :] = test_predict

plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()



In [14]:
# METHOD 3 - exchange time_step and dimension in reshaped data

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [15]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

origin_values = series.values.astype('float32')

# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1))   # reshape(-1,1) here is important

train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]

print len(train), len(test)
print train[4:10]
print test[4:10]


96 48
[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[[ 0.48455599]
 [ 0.61389959]
 [ 0.69691122]
 [ 0.70077217]
 [ 0.57915056]
 [ 0.46911195]]

In [16]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
    dataX = []
    dataY = []
    for i in range(len(dataset)-look_back):
        dataX.append(dataset[i:(i+look_back),0])
        dataY.append(dataset[i+look_back,0])
    return np.array(dataX), np.array(dataY)

In [17]:
time_step = 1
look_back = 1
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)

print trainX[4:10]
print trainY[4:10]
print trainX.shape


[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[ 0.05984557  0.08494207  0.08494207  0.06177607  0.02895753  0.        ]
(95, 1)

In [19]:
# reshape data into [samples, dimension, time_step], so that LSTM can read
## Compare with Method 1, Method 2, here's the difference: just change the position of dimension and time_step

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))

print trainX[4:10]
print testX[4:10]


[[[ 0.03281853]]

 [[ 0.05984557]]

 [[ 0.08494207]]

 [[ 0.08494207]]

 [[ 0.06177607]]

 [[ 0.02895753]]]
[[[ 0.48455599]]

 [[ 0.61389959]]

 [[ 0.69691122]]

 [[ 0.70077217]]

 [[ 0.57915056]]

 [[ 0.46911195]]]

In [20]:
# LSTM model

model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:]))  # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1))  # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
         validation_data=(testX, testY))


Train on 95 samples, validate on 47 samples
Epoch 1/100
1s - loss: 0.0328 - val_loss: 0.1836
Epoch 2/100
0s - loss: 0.0150 - val_loss: 0.1219
Epoch 3/100
0s - loss: 0.0123 - val_loss: 0.1048
Epoch 4/100
0s - loss: 0.0115 - val_loss: 0.0894
Epoch 5/100
0s - loss: 0.0107 - val_loss: 0.0869
Epoch 6/100
0s - loss: 0.0100 - val_loss: 0.0771
Epoch 7/100
0s - loss: 0.0091 - val_loss: 0.0699
Epoch 8/100
0s - loss: 0.0083 - val_loss: 0.0627
Epoch 9/100
0s - loss: 0.0074 - val_loss: 0.0569
Epoch 10/100
0s - loss: 0.0066 - val_loss: 0.0438
Epoch 11/100
0s - loss: 0.0059 - val_loss: 0.0374
Epoch 12/100
0s - loss: 0.0051 - val_loss: 0.0309
Epoch 13/100
0s - loss: 0.0045 - val_loss: 0.0297
Epoch 14/100
0s - loss: 0.0039 - val_loss: 0.0224
Epoch 15/100
0s - loss: 0.0035 - val_loss: 0.0197
Epoch 16/100
0s - loss: 0.0031 - val_loss: 0.0159
Epoch 17/100
0s - loss: 0.0028 - val_loss: 0.0137
Epoch 18/100
0s - loss: 0.0025 - val_loss: 0.0120
Epoch 19/100
0s - loss: 0.0024 - val_loss: 0.0116
Epoch 20/100
0s - loss: 0.0022 - val_loss: 0.0092
Epoch 21/100
0s - loss: 0.0022 - val_loss: 0.0090
Epoch 22/100
0s - loss: 0.0022 - val_loss: 0.0088
Epoch 23/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 24/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 25/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 26/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 27/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 28/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 29/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 30/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 31/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 32/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 33/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 34/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 35/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 36/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 37/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 38/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 39/100
0s - loss: 0.0022 - val_loss: 0.0086
Epoch 40/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 41/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 42/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 43/100
0s - loss: 0.0020 - val_loss: 0.0085
Epoch 44/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 45/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 46/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 47/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 48/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 49/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 50/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 51/100
0s - loss: 0.0021 - val_loss: 0.0097
Epoch 52/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 53/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 54/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 55/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 56/100
0s - loss: 0.0021 - val_loss: 0.0084
Epoch 57/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 58/100
0s - loss: 0.0022 - val_loss: 0.0087
Epoch 59/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 60/100
0s - loss: 0.0020 - val_loss: 0.0083
Epoch 61/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 62/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 63/100
0s - loss: 0.0020 - val_loss: 0.0091
Epoch 64/100
0s - loss: 0.0020 - val_loss: 0.0088
Epoch 65/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 66/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 67/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 68/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 69/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 70/100
0s - loss: 0.0021 - val_loss: 0.0091
Epoch 71/100
0s - loss: 0.0021 - val_loss: 0.0086
Epoch 72/100
0s - loss: 0.0020 - val_loss: 0.0091
Epoch 73/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 74/100
0s - loss: 0.0021 - val_loss: 0.0082
Epoch 75/100
0s - loss: 0.0020 - val_loss: 0.0090
Epoch 76/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 77/100
0s - loss: 0.0020 - val_loss: 0.0087
Epoch 78/100
0s - loss: 0.0020 - val_loss: 0.0091
Epoch 79/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 80/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 81/100
0s - loss: 0.0021 - val_loss: 0.0087
Epoch 82/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 83/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 84/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 85/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 86/100
0s - loss: 0.0021 - val_loss: 0.0089
Epoch 87/100
0s - loss: 0.0020 - val_loss: 0.0083
Epoch 88/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 89/100
0s - loss: 0.0021 - val_loss: 0.0091
Epoch 90/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 91/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 92/100
0s - loss: 0.0020 - val_loss: 0.0086
Epoch 93/100
0s - loss: 0.0020 - val_loss: 0.0082
Epoch 94/100
0s - loss: 0.0021 - val_loss: 0.0085
Epoch 95/100
0s - loss: 0.0022 - val_loss: 0.0091
Epoch 96/100
0s - loss: 0.0021 - val_loss: 0.0088
Epoch 97/100
0s - loss: 0.0021 - val_loss: 0.0090
Epoch 98/100
0s - loss: 0.0021 - val_loss: 0.0082
Epoch 99/100
0s - loss: 0.0021 - val_loss: 0.0083
Epoch 100/100
0s - loss: 0.0020 - val_loss: 0.0092
Out[20]:
<keras.callbacks.History at 0x12341ac50>

In [21]:
# make prediction

train_predict = model.predict(trainX)
test_predict = model.predict(testX)

train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print

print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape


Train RMSE: 0.0448691
Test RMSE: 0.0961101

[[ 0.04352463]
 [ 0.0673124 ]
 [ 0.08974855]
 [ 0.08974855]
 [ 0.06902682]
 [ 0.04016035]]
(144, 1)
(95, 1)
(47, 1)

In [24]:
# plot the prediction

train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict

test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back:len(origin_values)-1, :] = test_predict

plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()



In [25]:
# METHOD 4 - LSTM with memory between batches

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [26]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

origin_values = series.values.astype('float32')

# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1))   # reshape(-1,1) here is important

train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]

print len(train), len(test)
print train[4:10]
print test[4:10]


96 48
[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[[ 0.48455599]
 [ 0.61389959]
 [ 0.69691122]
 [ 0.70077217]
 [ 0.57915056]
 [ 0.46911195]]

In [27]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
    dataX = []
    dataY = []
    for i in range(len(dataset)-look_back):
        dataX.append(dataset[i:(i+look_back),0])
        dataY.append(dataset[i+look_back,0])
    return np.array(dataX), np.array(dataY)

In [28]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)

print trainX[4:10]
print trainY[4:10]
print trainX.shape


[[ 0.03281853  0.05984557  0.08494207]
 [ 0.05984557  0.08494207  0.08494207]
 [ 0.08494207  0.08494207  0.06177607]
 [ 0.08494207  0.06177607  0.02895753]
 [ 0.06177607  0.02895753  0.        ]
 [ 0.02895753  0.          0.02702703]]
[ 0.08494207  0.06177607  0.02895753  0.          0.02702703  0.02123553]
(93, 3)

In [29]:
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))

print trainX[4:10]
print testX[4:10]


[[[ 0.03281853]
  [ 0.05984557]
  [ 0.08494207]]

 [[ 0.05984557]
  [ 0.08494207]
  [ 0.08494207]]

 [[ 0.08494207]
  [ 0.08494207]
  [ 0.06177607]]

 [[ 0.08494207]
  [ 0.06177607]
  [ 0.02895753]]

 [[ 0.06177607]
  [ 0.02895753]
  [ 0.        ]]

 [[ 0.02895753]
  [ 0.        ]
  [ 0.02702703]]]
[[[ 0.48455599]
  [ 0.61389959]
  [ 0.69691122]]

 [[ 0.61389959]
  [ 0.69691122]
  [ 0.70077217]]

 [[ 0.69691122]
  [ 0.70077217]
  [ 0.57915056]]

 [[ 0.70077217]
  [ 0.57915056]
  [ 0.46911195]]

 [[ 0.57915056]
  [ 0.46911195]
  [ 0.38803086]]

 [[ 0.46911195]
  [ 0.38803086]
  [ 0.44787642]]]

In [30]:
# Here comes details that makes method 4 different from the above methods

## first of all, in your LSTM model, set stateful as True
## Also need to hard code batch_input_shape:
### number of samples in a batch
### number of time steps in a sample
### number of features in a time step
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, time_step), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [31]:
## secondly, you need to create a loop of epoches and fit the model + reset state in each epoch
### This is because training data cannot be shuffled when fitting the model
### and you need to explicit reset the network state after each epoch, after training prediction

for i in range(100):
    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()


Epoch 1/1
2s - loss: 0.0043
Epoch 1/1
1s - loss: 0.0096
Epoch 1/1
1s - loss: 0.0073
Epoch 1/1
0s - loss: 0.0059
Epoch 1/1
0s - loss: 0.0051
Epoch 1/1
1s - loss: 0.0046
Epoch 1/1
1s - loss: 0.0044
Epoch 1/1
1s - loss: 0.0042
Epoch 1/1
1s - loss: 0.0041
Epoch 1/1
0s - loss: 0.0041
Epoch 1/1
0s - loss: 0.0040
Epoch 1/1
1s - loss: 0.0040
Epoch 1/1
0s - loss: 0.0039
Epoch 1/1
0s - loss: 0.0039
Epoch 1/1
1s - loss: 0.0039
Epoch 1/1
1s - loss: 0.0039
Epoch 1/1
0s - loss: 0.0038
Epoch 1/1
0s - loss: 0.0038
Epoch 1/1
0s - loss: 0.0038
Epoch 1/1
0s - loss: 0.0038
Epoch 1/1
0s - loss: 0.0037
Epoch 1/1
1s - loss: 0.0037
Epoch 1/1
0s - loss: 0.0037
Epoch 1/1
0s - loss: 0.0037
Epoch 1/1
0s - loss: 0.0036
Epoch 1/1
1s - loss: 0.0036
Epoch 1/1
0s - loss: 0.0036
Epoch 1/1
0s - loss: 0.0036
Epoch 1/1
0s - loss: 0.0036
Epoch 1/1
1s - loss: 0.0035
Epoch 1/1
0s - loss: 0.0035
Epoch 1/1
0s - loss: 0.0035
Epoch 1/1
1s - loss: 0.0035
Epoch 1/1
1s - loss: 0.0035
Epoch 1/1
0s - loss: 0.0034
Epoch 1/1
1s - loss: 0.0034
Epoch 1/1
0s - loss: 0.0034
Epoch 1/1
1s - loss: 0.0034
Epoch 1/1
0s - loss: 0.0034
Epoch 1/1
0s - loss: 0.0034
Epoch 1/1
0s - loss: 0.0033
Epoch 1/1
0s - loss: 0.0033
Epoch 1/1
0s - loss: 0.0033
Epoch 1/1
0s - loss: 0.0033
Epoch 1/1
0s - loss: 0.0033
Epoch 1/1
0s - loss: 0.0032
Epoch 1/1
1s - loss: 0.0032
Epoch 1/1
1s - loss: 0.0032
Epoch 1/1
1s - loss: 0.0032
Epoch 1/1
0s - loss: 0.0031
Epoch 1/1
0s - loss: 0.0031
Epoch 1/1
1s - loss: 0.0031
Epoch 1/1
0s - loss: 0.0031
Epoch 1/1
1s - loss: 0.0031
Epoch 1/1
0s - loss: 0.0030
Epoch 1/1
1s - loss: 0.0030
Epoch 1/1
1s - loss: 0.0030
Epoch 1/1
0s - loss: 0.0030
Epoch 1/1
1s - loss: 0.0030
Epoch 1/1
1s - loss: 0.0029
Epoch 1/1
0s - loss: 0.0029
Epoch 1/1
0s - loss: 0.0029
Epoch 1/1
1s - loss: 0.0029
Epoch 1/1
1s - loss: 0.0028
Epoch 1/1
0s - loss: 0.0028
Epoch 1/1
0s - loss: 0.0028
Epoch 1/1
1s - loss: 0.0027
Epoch 1/1
1s - loss: 0.0027
Epoch 1/1
1s - loss: 0.0027
Epoch 1/1
1s - loss: 0.0027
Epoch 1/1
0s - loss: 0.0026
Epoch 1/1
1s - loss: 0.0026
Epoch 1/1
0s - loss: 0.0026
Epoch 1/1
1s - loss: 0.0025
Epoch 1/1
0s - loss: 0.0025
Epoch 1/1
1s - loss: 0.0025
Epoch 1/1
0s - loss: 0.0025
Epoch 1/1
0s - loss: 0.0024
Epoch 1/1
0s - loss: 0.0024
Epoch 1/1
0s - loss: 0.0024
Epoch 1/1
1s - loss: 0.0023
Epoch 1/1
1s - loss: 0.0023
Epoch 1/1
0s - loss: 0.0023
Epoch 1/1
1s - loss: 0.0022
Epoch 1/1
1s - loss: 0.0022
Epoch 1/1
0s - loss: 0.0022
Epoch 1/1
0s - loss: 0.0021
Epoch 1/1
1s - loss: 0.0021
Epoch 1/1
1s - loss: 0.0021
Epoch 1/1
0s - loss: 0.0021
Epoch 1/1
0s - loss: 0.0020
Epoch 1/1
0s - loss: 0.0020
Epoch 1/1
1s - loss: 0.0020
Epoch 1/1
0s - loss: 0.0020
Epoch 1/1
0s - loss: 0.0020
Epoch 1/1
0s - loss: 0.0020
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
0s - loss: 0.0019

In [33]:
# make predictions

## here, you have to specify the same batch_size used above
train_predict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
test_predict = model.predict(testX, batch_size=batch_size)

train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print

print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape


Train RMSE: 0.0440787
Test RMSE: 0.0971128

[[ 0.08172657]
 [ 0.08843055]
 [ 0.07361747]
 [ 0.04675714]
 [ 0.01867417]
 [ 0.02978996]]
(144, 1)
(93, 1)
(45, 1)

In [44]:
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict

test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2:len(origin_values), :] = test_predict

plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()



In [45]:
# Method 5 - Stacked LSTMs with Memory Between Batches
## You just stack multiple LSTM models together

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [57]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

origin_values = series.values.astype('float32')

# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1))   # reshape(-1,1) here is important

train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]

print len(train), len(test)
print train[4:10]
print test[4:10]


96 48
[[ 0.03281853]
 [ 0.05984557]
 [ 0.08494207]
 [ 0.08494207]
 [ 0.06177607]
 [ 0.02895753]]
[[ 0.48455599]
 [ 0.61389959]
 [ 0.69691122]
 [ 0.70077217]
 [ 0.57915056]
 [ 0.46911195]]

In [58]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
    dataX = []
    dataY = []
    for i in range(len(dataset)-look_back):
        dataX.append(dataset[i:(i+look_back),0])
        dataY.append(dataset[i+look_back,0])
    return np.array(dataX), np.array(dataY)

In [64]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)

print trainX[4:10]
print trainY[4:10]
print trainX.shape


[[ 0.03281853  0.05984557  0.08494207]
 [ 0.05984557  0.08494207  0.08494207]
 [ 0.08494207  0.08494207  0.06177607]
 [ 0.08494207  0.06177607  0.02895753]
 [ 0.06177607  0.02895753  0.        ]
 [ 0.02895753  0.          0.02702703]]
[ 0.08494207  0.06177607  0.02895753  0.          0.02702703  0.02123553]
(93, 3)

In [65]:
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))

print trainX[4:10]
print testX[4:10]


[[[ 0.03281853]
  [ 0.05984557]
  [ 0.08494207]]

 [[ 0.05984557]
  [ 0.08494207]
  [ 0.08494207]]

 [[ 0.08494207]
  [ 0.08494207]
  [ 0.06177607]]

 [[ 0.08494207]
  [ 0.06177607]
  [ 0.02895753]]

 [[ 0.06177607]
  [ 0.02895753]
  [ 0.        ]]

 [[ 0.02895753]
  [ 0.        ]
  [ 0.02702703]]]
[[[ 0.48455599]
  [ 0.61389959]
  [ 0.69691122]]

 [[ 0.61389959]
  [ 0.69691122]
  [ 0.70077217]]

 [[ 0.69691122]
  [ 0.70077217]
  [ 0.57915056]]

 [[ 0.70077217]
  [ 0.57915056]
  [ 0.46911195]]

 [[ 0.57915056]
  [ 0.46911195]
  [ 0.38803086]]

 [[ 0.46911195]
  [ 0.38803086]
  [ 0.44787642]]]

In [66]:
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [67]:
for i in range(100):
    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()


Epoch 1/1
4s - loss: 0.0047
Epoch 1/1
1s - loss: 0.0132
Epoch 1/1
1s - loss: 0.0088
Epoch 1/1
1s - loss: 0.0070
Epoch 1/1
1s - loss: 0.0061
Epoch 1/1
1s - loss: 0.0058
Epoch 1/1
1s - loss: 0.0057
Epoch 1/1
1s - loss: 0.0056
Epoch 1/1
1s - loss: 0.0055
Epoch 1/1
1s - loss: 0.0055
Epoch 1/1
1s - loss: 0.0055
Epoch 1/1
1s - loss: 0.0055
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0054
Epoch 1/1
1s - loss: 0.0053
Epoch 1/1
1s - loss: 0.0053
Epoch 1/1
1s - loss: 0.0053
Epoch 1/1
1s - loss: 0.0053
Epoch 1/1
1s - loss: 0.0052
Epoch 1/1
1s - loss: 0.0052
Epoch 1/1
1s - loss: 0.0052
Epoch 1/1
1s - loss: 0.0051
Epoch 1/1
1s - loss: 0.0051
Epoch 1/1
1s - loss: 0.0050
Epoch 1/1
1s - loss: 0.0050
Epoch 1/1
1s - loss: 0.0049
Epoch 1/1
1s - loss: 0.0048
Epoch 1/1
1s - loss: 0.0047
Epoch 1/1
1s - loss: 0.0047
Epoch 1/1
1s - loss: 0.0046
Epoch 1/1
1s - loss: 0.0045
Epoch 1/1
1s - loss: 0.0045
Epoch 1/1
1s - loss: 0.0044
Epoch 1/1
1s - loss: 0.0043
Epoch 1/1
1s - loss: 0.0043
Epoch 1/1
1s - loss: 0.0042
Epoch 1/1
1s - loss: 0.0041
Epoch 1/1
1s - loss: 0.0041
Epoch 1/1
1s - loss: 0.0040
Epoch 1/1
1s - loss: 0.0039
Epoch 1/1
1s - loss: 0.0038
Epoch 1/1
1s - loss: 0.0037
Epoch 1/1
1s - loss: 0.0036
Epoch 1/1
1s - loss: 0.0035
Epoch 1/1
1s - loss: 0.0035
Epoch 1/1
1s - loss: 0.0034
Epoch 1/1
1s - loss: 0.0033
Epoch 1/1
1s - loss: 0.0032
Epoch 1/1
1s - loss: 0.0031
Epoch 1/1
1s - loss: 0.0030
Epoch 1/1
1s - loss: 0.0029
Epoch 1/1
1s - loss: 0.0029
Epoch 1/1
1s - loss: 0.0028
Epoch 1/1
1s - loss: 0.0027
Epoch 1/1
1s - loss: 0.0026
Epoch 1/1
1s - loss: 0.0025
Epoch 1/1
1s - loss: 0.0025
Epoch 1/1
1s - loss: 0.0024
Epoch 1/1
1s - loss: 0.0023
Epoch 1/1
1s - loss: 0.0023
Epoch 1/1
1s - loss: 0.0022
Epoch 1/1
1s - loss: 0.0022
Epoch 1/1
1s - loss: 0.0021
Epoch 1/1
1s - loss: 0.0021
Epoch 1/1
1s - loss: 0.0020
Epoch 1/1
1s - loss: 0.0020
Epoch 1/1
1s - loss: 0.0020
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0019
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0018
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0017
Epoch 1/1
1s - loss: 0.0016
Epoch 1/1
1s - loss: 0.0016
Epoch 1/1
1s - loss: 0.0016
Epoch 1/1
1s - loss: 0.0016
Epoch 1/1
1s - loss: 0.0016
Epoch 1/1
1s - loss: 0.0016

In [68]:
# make predictions

## here, you have to specify the same batch_size used above
train_predict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
test_predict = model.predict(testX, batch_size=batch_size)

train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print

print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape


Train RMSE: 0.0400681
Test RMSE: 0.1387102

[[ 0.08066928]
 [ 0.0845684 ]
 [ 0.06806657]
 [ 0.04468185]
 [ 0.02371259]
 [ 0.03402945]]
(144, 1)
(93, 1)
(45, 1)

In [69]:
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict

test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2:len(origin_values), :] = test_predict

plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()