In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# download the dataset here:
## https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60#!ds=22u3&display=line
In [3]:
# METHOD 1 - time t to predict t+1
## look back is 1, time step is 1
np.random.seed(410)
In [4]:
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
plt.plot(series)
plt.show()
In [5]:
series.head()
Out[5]:
In [6]:
origin_values = series.values.astype('float32')
# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1)) # reshape(-1,1) here is important
origin_values[4:10]
Out[6]:
In [7]:
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]
In [8]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
dataX = []
dataY = []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back),0])
dataY.append(dataset[i+look_back,0])
return np.array(dataX), np.array(dataY)
In [9]:
time_step = 1
look_back = 1
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape
In [10]:
# reshape data into [samples, time_step, dimension], so that LSTM can read
## In this case, X, Y should be the same dimension, since Y is the sequence you want to predict from X
trainX = np.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], time_step, testX.shape[1]))
In [11]:
print trainX[4:10]
print testX[4:10]
In [47]:
trainX.shape[1:]
Out[47]:
In [48]:
# LSTM model
model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:])) # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1)) # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
validation_data=(testX, testY))
Out[48]:
In [50]:
# make prediction
train_predict = model.predict(trainX)
test_predict = model.predict(testX)
train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
In [51]:
print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape
In [52]:
# plot the prediction
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict
test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back:len(origin_values)-1, :] = test_predict
plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()
In [13]:
# METHOD 2 - WINDOW method
## time t-2, t-1, t to predict time t+1
## Compared with Method 1, you just change look_back value
## and when you have n look_back, your features (trainX, testX) will have n columns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
In [19]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
origin_values = series.values.astype('float32')
# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1)) # reshape(-1,1) here is important
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]
print test[4:10]
In [20]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
dataX = []
dataY = []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back),0])
dataY.append(dataset[i+look_back,0])
return np.array(dataX), np.array(dataY)
In [21]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape
In [22]:
# reshape data into [samples, time_step, dimension], so that LSTM can read
## In this case, X, Y should be the same dimension, since Y is the sequence you want to predict from X
trainX = np.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], time_step, testX.shape[1]))
print trainX[4:10]
print testX[4:10]
In [23]:
# LSTM model
model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:])) # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1)) # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
validation_data=(testX, testY))
Out[23]:
In [29]:
# make prediction
train_predict = model.predict(trainX)
test_predict = model.predict(testX)
train_RMSE = math.sqrt(mean_squared_error(trainY, train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY, test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
In [53]:
# plot the prediction
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict
test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2-1:len(origin_values)-1, :] = test_predict
plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()
In [14]:
# METHOD 3 - exchange time_step and dimension in reshaped data
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
In [15]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
origin_values = series.values.astype('float32')
# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1)) # reshape(-1,1) here is important
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]
print test[4:10]
In [16]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
dataX = []
dataY = []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back),0])
dataY.append(dataset[i+look_back,0])
return np.array(dataX), np.array(dataY)
In [17]:
time_step = 1
look_back = 1
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape
In [19]:
# reshape data into [samples, dimension, time_step], so that LSTM can read
## Compare with Method 1, Method 2, here's the difference: just change the position of dimension and time_step
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))
print trainX[4:10]
print testX[4:10]
In [20]:
# LSTM model
model = Sequential()
model.add(LSTM(4, input_shape=trainX.shape[1:])) # LSTM just want (time_stem, dimension) as input_shape
model.add(Dense(1)) # single output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,
validation_data=(testX, testY))
Out[20]:
In [21]:
# make prediction
train_predict = model.predict(trainX)
test_predict = model.predict(testX)
train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print
print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape
In [24]:
# plot the prediction
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict
test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back:len(origin_values)-1, :] = test_predict
plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()
In [25]:
# METHOD 4 - LSTM with memory between batches
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
In [26]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
origin_values = series.values.astype('float32')
# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1)) # reshape(-1,1) here is important
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]
print test[4:10]
In [27]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
dataX = []
dataY = []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back),0])
dataY.append(dataset[i+look_back,0])
return np.array(dataX), np.array(dataY)
In [28]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape
In [29]:
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))
print trainX[4:10]
print testX[4:10]
In [30]:
# Here comes details that makes method 4 different from the above methods
## first of all, in your LSTM model, set stateful as True
## Also need to hard code batch_input_shape:
### number of samples in a batch
### number of time steps in a sample
### number of features in a time step
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, time_step), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
In [31]:
## secondly, you need to create a loop of epoches and fit the model + reset state in each epoch
### This is because training data cannot be shuffled when fitting the model
### and you need to explicit reset the network state after each epoch, after training prediction
for i in range(100):
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
In [33]:
# make predictions
## here, you have to specify the same batch_size used above
train_predict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
test_predict = model.predict(testX, batch_size=batch_size)
train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print
print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape
In [44]:
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict
test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2:len(origin_values), :] = test_predict
plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()
In [45]:
# Method 5 - Stacked LSTMs with Memory Between Batches
## You just stack multiple LSTM models together
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
In [57]:
np.random.seed(410)
series = pd.read_csv('international-airline-passengers.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
origin_values = series.values.astype('float32')
# normalize the values
scaler = MinMaxScaler(feature_range=(0,1))
origin_values = scaler.fit_transform(origin_values.reshape(-1,1)) # reshape(-1,1) here is important
train_size = int(len(origin_values)*0.67)
train, test = origin_values[0:train_size,:], origin_values[train_size:len(origin_values),:]
print len(train), len(test)
print train[4:10]
print test[4:10]
In [58]:
# dataX indicates time t-look_back+1, while dataY indicates time t+1
def shift_time(dataset, look_back):
dataX = []
dataY = []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back),0])
dataY.append(dataset[i+look_back,0])
return np.array(dataX), np.array(dataY)
In [64]:
time_step = 1
look_back = 3
trainX, trainY = shift_time(train, look_back)
testX, testY = shift_time(test, look_back)
print trainX[4:10]
print trainY[4:10]
print trainX.shape
In [65]:
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], time_step))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], time_step))
print trainX[4:10]
print testX[4:10]
In [66]:
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
In [67]:
for i in range(100):
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
In [68]:
# make predictions
## here, you have to specify the same batch_size used above
train_predict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
test_predict = model.predict(testX, batch_size=batch_size)
train_RMSE = math.sqrt(mean_squared_error(trainY[:], train_predict[:,0]))
print 'Train RMSE: %.7f' % (train_RMSE)
test_RMSE = math.sqrt(mean_squared_error(testY[:], test_predict[:,0]))
print 'Test RMSE: %.7f' % (test_RMSE)
print
print train_predict[4:10]
print origin_values.shape
print train_predict.shape
print test_predict.shape
In [69]:
train_predict_plot = np.empty_like(origin_values)
train_predict_plot[:, :] = np.nan
train_predict_plot[0:len(train_predict), :] = train_predict
test_predict_plot = np.empty_like(origin_values)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+look_back*2:len(origin_values), :] = test_predict
plt.plot(origin_values)
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()