In [56]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from timeit import default_timer as timer
url = 'http://www.inf.utfsm.cl/~cvalle/international-airline-passengers.csv'
dataframe = pd.read_csv(url, sep=',', usecols=[1], engine='python', skipfooter=3)
dataframe[:] = dataframe[:].astype('float32')
df_train, df_test = dataframe[0:96].values, dataframe[96:].values
scaler = MinMaxScaler(feature_range=(0, 1)).fit(df_train)
stream_train_scaled = scaler.transform(df_train)
stream_test_scaled = scaler.transform(df_test)
In [2]:
dataframe
Out[2]:
El dataset es generado de la siguiente forma, dados un vector de datos $X_{inicial} = \{x_1, \ldots, x_I\}$ se generan $L$ vectores $X_l$ de tamaño $I-L$ que corresponden a los inputs (configuran una matriz $X$ de $I-L \times L)$ y un vector de targets $Y$ de largo $I-L$. Los inputs están relacionados de los targets de la forma $$[ X = (X_1, \ldots X_L), Y ]$$
In [78]:
def create_dataset(dataset, lag=1):
dataX = np.zeros((dataset.shape[0]-lag, lag), dtype=np.float32)
for i in range(lag):
dataX[:,i] = dataset[i:-lag+i][:,0]
dataY = dataset[lag:]
return dataX, dataY
In [84]:
lag = 3
trainX, TrainY = create_dataset(stream_train_scaled, lag)
testX, TestY = create_dataset(stream_test_scaled, lag)
In [86]:
TrainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
TestX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
In [52]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
In [72]:
model = Sequential()
model.add(LSTM(output_dim=4, input_dim=lag, activation='tanh', inner_activation='sigmoid'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
start = timer()
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=2)
end = timer()
model.save("LSTM_lag3.h5")
print "Elapsed training time: %s sec"%(end-start)
In [73]:
from keras.models import load_model
model = load_model("LSTM_lag3.h5")
trainPredict = model.predict(TrainX)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
testPredict = model.predict(TestX)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
In [74]:
import math
from sklearn.metrics import mean_squared_error
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
In [10]:
%matplotlib inline
import matplotlib.pyplot as plt
def plot(data_init, data_predicted, lag, title, train_or_test):
"""
Plot original targets and predicted targets
"""
months = np.arange(0,144,1)
plt.figure(figsize=(15,5))
if train_or_test == "train":
ticks = np.arange(0, 102, 6)
plt.xlim(0, 96)
elif train_or_test == "test":
ticks = np.arange(96, 150, 6)
plt.xlim(96, 144)
plt.title(title, fontsize=16)
plt.plot(months, data_init, "b-", lw=1.5, label="Targets")
plt.plot(months, data_predicted, "r-", lw=1.5, label=u"Predicción")
plt.xlabel(r"$t$ (meses)", fontsize=16)
plt.xticks(ticks)
plt.ylabel(r"Personas", fontsize=16)
plt.grid()
plt.legend(loc='best')
plt.show()
def plot_vs_series(series, train_predicted, test_predicted, title):
months = np.arange(0,144,1)
ticks = np.arange(0, 150, 6)
plt.figure(figsize=(15,5))
plt.title(title, fontsize=16)
plt.plot(months, series[:,0], 'g-', lw=1.0, label=u"Serie original")
plt.plot(months, train_predicted, 'b-', lw=1.5, label=u"Predicción train")
plt.plot(months, test_predicted, 'r-', lw=1.5, label=u"Predicción test")
plt.xlabel(r"$t$ (meses)", fontsize=16)
plt.xticks(ticks)
plt.xlim(0, 144)
plt.ylabel(r"Personas", fontsize=16)
plt.grid()
plt.legend(loc='best')
plt.show()
In [11]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataframe.values)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[lag:len(trainPredict)+lag, :] = trainPredict
# shift train original targets for plotting
trainY_asoriginal = np.empty_like(dataframe.values)
trainY_asoriginal[:, :] = np.nan
trainY_asoriginal[lag:len(trainY_noscale)+lag, :] = trainY_noscale
plot(trainY_asoriginal, trainPredictPlot, lag, "Resultados conjunto de entrenamiento", "train")
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataframe.values)
testPredictPlot[:, :] = np.nan
testPredictPlot[(len(trainPredict)+2*lag):, :] = testPredict
# shift test original targets for plotting
testY_asoriginal = np.empty_like(dataframe.values)
testY_asoriginal[:, :] = np.nan
testY_asoriginal[(len(trainY_noscale)+2*lag):, :] = testY_noscale
plot(testY_asoriginal, testPredictPlot, lag, "Resultados conjunto de pruebas", "test")
plot_vs_series(dataframe.values, trainPredictPlot, testPredictPlot, "Resultados sobre toda la serie original")
In [ ]:
nb = range(4,13,2)
k = 5
kf_CV = KFold(TrainY[:,0].shape[0], k, shuffle=True)
results = []
for n in nb:
print "Usando",n,"bloques LSTM"
losses = []
for i, (train, test) in enumerate(kf_CV):
print "Analizando fold", i+1, "/", k
model = None
model = Sequential()
model.add(LSTM(output_dim=n, input_dim=lag, activation='tanh', inner_activation='sigmoid'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(TrainX[train], TrainY[train], nb_epoch=100, batch_size=1, verbose=0)
loss = model.evaluate(TrainX[test], TrainY[test])
losses.append(loss)
results.append(losses)
print losses
print "Resultados finales"
print results
In [49]:
f = open("pregunta1_h.txt")
print f.read()
f.close()
In [47]:
err_4b = np.mean([0.0058013456873595715, 0.0067546665668487549,
0.0039709298871457577, 0.0077248862944543362, 0.0068293004296720028])
err_6b = np.mean([0.0079037416726350784, 0.0063770841807126999, 0.0039345691911876202,
0.0053643947467207909, 0.0053532104939222336])
err_8b = np.mean([0.0060981051065027714, 0.0064719794318079948,
0.0044479849748313427, 0.0044790026731789112, 0.0056187780573964119])
err_10b = np.mean([0.0071236342191696167, 0.0046910210512578487,
0.0047823912464082241, 0.0059864339418709278, 0.0055963038466870785])
err_12b = np.mean([0.0073507814668118954, 0.0063614812679588795,
0.0038768104277551174, 0.0048401798121631145, 0.0056704352609813213])
print "Error con 4 bloques:",err_4b
print "Error con 6 bloques:",err_6b
print "Error con 8 bloques:",err_8b
print "Error con 10 bloques:",err_10b
print "Error con 12 bloques:",err_12b
Los resultados de k-fold Cross Validation muestran que obtenemos mejores resultados con 8 bloques LSTM.
In [8]:
import math
from sklearn.metrics import mean_squared_error
lags = [1, 2, 3, 4]
for lag in lags:
trainX, TrainY = create_dataset(stream_train_scaled, lag)
testX, TestY = create_dataset(stream_test_scaled, lag)
TrainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
TestX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
model = Sequential()
model.add(LSTM(output_dim=8, input_dim=lag, activation='tanh', inner_activation='sigmoid'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=0)
loss = model.evaluate(TestX, TestY, verbose=0)
print "Loss para lag",lag,":",loss
trainPredict = model.predict(TrainX)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testPredict = model.predict(TestX)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
del model
Es posible ver que valores de lag bajos dan menor error en el conjunto de pruebas, en especial con lag = 1. Respecto al error cuadrático medio de los datos reconstruidos los menores valores en el conjunto de pruebas se encuentran con lag = 1. No parece haber una diferencia significativa entre utilizar lag = 2 y lag = 3
In [24]:
from keras.layers import GRU
from keras.layers import SimpleRNN
lag = 3
trainX, TrainY = create_dataset(stream_train_scaled, lag)
testX, TestY = create_dataset(stream_test_scaled, lag)
TrainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
TestX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
for i in range(10):
model = Sequential()
model.add(GRU(output_dim=8, input_dim=lag, inner_init='orthogonal', activation='tanh'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=0)
model.save("GRU_lag3_exec_"+str(i)+".h5")
In [25]:
for i in range(10):
model = Sequential()
model.add(SimpleRNN(output_dim=8, input_dim=lag, inner_init='orthogonal',activation='tanh'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=0)
model.save("RNN_lag3_exec_"+str(i)+".h5")
In [27]:
for i in range(10):
model = Sequential()
model.add(LSTM(output_dim=8, input_dim=3, activation='tanh', inner_activation='sigmoid'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=0)
model.save("LSTM_lag3_8block_exec_"+str(i)+".h5")
In [28]:
from keras.models import load_model
# Load GRU results
gru_data = {}
gru_data['trainPredict'] = []
gru_data['trainY_noscale'] = []
gru_data['testPredict'] = []
gru_data['testY_noscale'] = []
for i in range(10):
model = load_model("GRU_lag3_exec_"+str(i)+".h5")
gru_data['trainPredict'].append(scaler.inverse_transform(model.predict(TrainX)))
gru_data['trainY_noscale'].append(scaler.inverse_transform(TrainY))
gru_data['testPredict'].append(scaler.inverse_transform(model.predict(TestX)))
gru_data['testY_noscale'].append(scaler.inverse_transform(TestY))
del model
# Load RNN results
rnn_data = {}
rnn_data['trainPredict'] = []
rnn_data['trainY_noscale'] = []
rnn_data['testPredict'] = []
rnn_data['testY_noscale'] = []
for i in range(10):
model = load_model("RNN_lag3_exec_"+str(i)+".h5")
rnn_data['trainPredict'].append(scaler.inverse_transform(model.predict(TrainX)))
rnn_data['trainY_noscale'].append(scaler.inverse_transform(TrainY))
rnn_data['testPredict'].append(scaler.inverse_transform(model.predict(TestX)))
rnn_data['testY_noscale'].append(scaler.inverse_transform(TestY))
del model
# Load results
lstm_data = {}
lstm_data['trainPredict'] = []
lstm_data['trainY_noscale'] = []
lstm_data['testPredict'] = []
lstm_data['testY_noscale'] = []
for i in range(10):
model = load_model("LSTM_lag3_8block_exec_"+str(i)+".h5")
lstm_data['trainPredict'].append(scaler.inverse_transform(model.predict(TrainX)))
lstm_data['trainY_noscale'].append(scaler.inverse_transform(TrainY))
lstm_data['testPredict'].append(scaler.inverse_transform(model.predict(TestX)))
lstm_data['testY_noscale'].append(scaler.inverse_transform(TestY))
del model
In [40]:
import math
from sklearn.metrics import mean_squared_error
# Promediar salidas de 10 experimentos por tipo de red
gru_train_scores = []
gru_test_scores = []
for y_pred, y_data in zip(gru_data['trainPredict'], gru_data['trainY_noscale']):
gru_train_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
for y_pred, y_data in zip(gru_data['testPredict'], gru_data['testY_noscale']):
gru_test_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
trainScore = np.mean(gru_train_scores)
testScore = np.mean(gru_test_scores)
print('GRU Train Score: %.2f RMSE' % (trainScore))
print('GRU Test Score: %.2f RMSE' % (testScore))
rnn_train_scores = []
rnn_test_scores = []
for y_pred, y_data in zip(rnn_data['trainPredict'], rnn_data['trainY_noscale']):
rnn_train_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
for y_pred, y_data in zip(rnn_data['testPredict'], rnn_data['testY_noscale']):
rnn_test_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
trainScore = np.mean(rnn_train_scores)
testScore = np.mean(rnn_test_scores)
print('RNN Train Score: %.2f RMSE' % (trainScore))
print('RNN Test Score: %.2f RMSE' % (testScore))
lstm_train_scores = []
lstm_test_scores = []
for y_pred, y_data in zip(lstm_data['trainPredict'], lstm_data['trainY_noscale']):
lstm_train_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
for y_pred, y_data in zip(lstm_data['testPredict'], lstm_data['testY_noscale']):
lstm_test_scores.append(math.sqrt(mean_squared_error(y_data[:,0], y_pred[:,0])))
trainScore = np.mean(lstm_train_scores)
testScore = np.mean(lstm_test_scores)
print('LSTM Train Score: %.2f RMSE' % (trainScore))
print('LSTM Test Score: %.2f RMSE' % (testScore))
Es posible apreciar que no hay mucha diferencia entre los resultados de las máquinas. Computacionalmente es más rapido entrenar GRU y RNN que una LSTM. Respecto de los experimentos, la GRU en promedio posee mejor RMSE que las otras dos redes.
La idea del uso de timestep es interpretar los datos de manera distinta. En la versión original, formateamos el dataset de forma tal que asumieramos un timestep por cada ejemplo. Puede ser que en la realidad por ejemplo exista más de un timestep. Modificamos el código para reflejar este cambio-
In [172]:
TrainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
model = Sequential()
model.add(LSTM(8, input_dim=1, activation='tanh', inner_activation='sigmoid'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
start = timer()
model.fit(TrainX, TrainY, nb_epoch=100, batch_size=1, verbose=2)
end = timer()
model.save("LSTM_lag3_timestep3.h5")
print "Elapsed training time: %s"%(end - start)
In [173]:
model = load_model("LSTM_lag3_timestep3.h5")
TestX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
trainPredict = model.predict(TrainX)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
testPredict = model.predict(TestX)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
In [174]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
In [175]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataframe.values)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[lag:len(trainPredict)+lag, :] = trainPredict
# shift train original targets for plotting
trainY_asoriginal = np.empty_like(dataframe.values)
trainY_asoriginal[:, :] = np.nan
trainY_asoriginal[lag:len(trainY_noscale)+lag, :] = trainY_noscale
plot(trainY_asoriginal, trainPredictPlot, lag, "Resultados conjunto de entrenamiento", "train")
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataframe.values)
testPredictPlot[:, :] = np.nan
testPredictPlot[(len(trainPredict)+2*lag):, :] = testPredict
# shift test original targets for plotting
testY_asoriginal = np.empty_like(dataframe.values)
testY_asoriginal[:, :] = np.nan
testY_asoriginal[(len(trainY_noscale)+2*lag):, :] = testY_noscale
plot(testY_asoriginal, testPredictPlot, lag, "Resultados conjunto de pruebas", "test")
plot_vs_series(dataframe.values, trainPredictPlot, testPredictPlot, "Resultados sobre toda la serie original")
Los resultados son similares a la primera red encontrada, mejorando un poco el RMSE.
Respecto a los tiempos de ejecución, estos son comparables, aproximadamente de 30 segundos por red.
In [176]:
TrainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
TestX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
In [177]:
lag = 3
batch_size = 1
model = Sequential()
model.add(LSTM(8, batch_input_shape=(batch_size, lag, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(TrainX, TrainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
model.save("LSTM_lag3_batch1.h5")
In [178]:
model = load_model("LSTM_lag3_batch1.h5")
trainPredict = model.predict(TrainX, batch_size=batch_size)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
testPredict = model.predict(TestX, batch_size=batch_size)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
In [179]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
In [180]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataframe.values)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[lag:len(trainPredict)+lag, :] = trainPredict
# shift train original targets for plotting
trainY_asoriginal = np.empty_like(dataframe.values)
trainY_asoriginal[:, :] = np.nan
trainY_asoriginal[lag:len(trainY_noscale)+lag, :] = trainY_noscale
plot(trainY_asoriginal, trainPredictPlot, lag, "Resultados conjunto de entrenamiento LSTM memoria entre batches (1)", "train")
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataframe.values)
testPredictPlot[:, :] = np.nan
testPredictPlot[(len(trainPredict)+2*lag):, :] = testPredict
# shift test original targets for plotting
testY_asoriginal = np.empty_like(dataframe.values)
testY_asoriginal[:, :] = np.nan
testY_asoriginal[(len(trainY_noscale)+2*lag):, :] = testY_noscale
plot(testY_asoriginal, testPredictPlot, lag, "Resultados conjunto de pruebas LSTM memoria entre batches (1)", "test")
plot_vs_series(dataframe.values, trainPredictPlot, testPredictPlot, "Resultados sobre toda la serie original LSTM memoria entre batches (1)")
Se aprecian mejores resultados. El RMSE se encuentra en el mismo orden que las LTSM básicas.
In [181]:
lag = 3
batch_size = 3
model = Sequential()
model.add(LSTM(8, batch_input_shape=(batch_size, lag, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(TrainX, TrainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
model.save("LSTM_lag3_batch3.h5")
In [182]:
model = load_model("LSTM_lag3_batch3.h5")
trainPredict = model.predict(TrainX, batch_size=batch_size)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
testPredict = model.predict(TestX, batch_size=batch_size)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
In [183]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
In [184]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataframe.values)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[lag:len(trainPredict)+lag, :] = trainPredict
# shift train original targets for plotting
trainY_asoriginal = np.empty_like(dataframe.values)
trainY_asoriginal[:, :] = np.nan
trainY_asoriginal[lag:len(trainY_noscale)+lag, :] = trainY_noscale
plot(trainY_asoriginal, trainPredictPlot, lag, "Resultados conjunto de entrenamiento LSTM memoria entre batches (3)", "train")
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataframe.values)
testPredictPlot[:, :] = np.nan
testPredictPlot[(len(trainPredict)+2*lag):, :] = testPredict
# shift test original targets for plotting
testY_asoriginal = np.empty_like(dataframe.values)
testY_asoriginal[:, :] = np.nan
testY_asoriginal[(len(trainY_noscale)+2*lag):, :] = testY_noscale
plot(testY_asoriginal, testPredictPlot, lag, "Resultados conjunto de pruebas LSTM memoria entre batches (3)", "test")
plot_vs_series(dataframe.values, trainPredictPlot, testPredictPlot, "Resultados sobre toda la serie original LSTM memoria entre batches (3)")
El error RMSE aumenta bajo esta nueva red. La memoria cada batches de tamaño 3 induce una suavización de la función predictiva.
In [185]:
lag = 3
batch_size = 1
model = Sequential()
model.add(LSTM(8, batch_input_shape=(batch_size, lag, 1), stateful=True, return_sequences=True))
model.add(LSTM(8, batch_input_shape=(batch_size, lag, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(TrainX, TrainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
model.save("LSTM_lag3_stacked_batch_size1.h5")
In [186]:
model = load_model("LSTM_lag3_stacked_batch_size1.h5")
trainPredict = model.predict(TrainX, batch_size=batch_size)
trainPredict = scaler.inverse_transform(trainPredict)
trainY_noscale = scaler.inverse_transform(TrainY)
testPredict = model.predict(TestX, batch_size=batch_size)
testPredict = scaler.inverse_transform(testPredict)
testY_noscale = scaler.inverse_transform(TestY)
In [187]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY_noscale[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY_noscale[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
In [188]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataframe.values)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[lag:len(trainPredict)+lag, :] = trainPredict
# shift train original targets for plotting
trainY_asoriginal = np.empty_like(dataframe.values)
trainY_asoriginal[:, :] = np.nan
trainY_asoriginal[lag:len(trainY_noscale)+lag, :] = trainY_noscale
plot(trainY_asoriginal, trainPredictPlot, lag, "Resultados conjunto de entrenamiento LSTM apilada con memoria entre batches (1)", "train")
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataframe.values)
testPredictPlot[:, :] = np.nan
testPredictPlot[(len(trainPredict)+2*lag):, :] = testPredict
# shift test original targets for plotting
testY_asoriginal = np.empty_like(dataframe.values)
testY_asoriginal[:, :] = np.nan
testY_asoriginal[(len(trainY_noscale)+2*lag):, :] = testY_noscale
plot(testY_asoriginal, testPredictPlot, lag, "Resultados conjunto de pruebas LSTM apilada con memoria entre batches (1)", "test")
plot_vs_series(dataframe.values, trainPredictPlot, testPredictPlot, "Resultados sobre toda la serie original LSTM apilada con memoria entre batches (1)")
En este caso la red apilada tiene tendencia a overfitting, lo que se aprecia con la grán disminución en el error RMSE de entrenamiento.
In [ ]: