In [76]:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [33]:
def normalize_windows(win_data):
normalized_data = []
for window in win_data:
normalized_window = [((float(p) / float(window[0])) - 1) for p in window]
normalized_data.append(normalized_window)
return normalized_data
In [137]:
def load_data(filename, seq_len, normalize_window):
data = pd.read_csv(filename, sep='\n', index_col=False, header=None).values.flatten()
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
if normalize_window:
result = normalize_windows(result)
result = np.array(result)
row = round(0.9 * result.shape[0])
train = result[:int(row), :]
np.random.shuffle(train)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = result[int(row):, :-1]
y_test = result[int(row):, -1]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
return [x_train, y_train, x_test, y_test]
In [138]:
#load in our data
x_train, y_train, x_test, y_test = load_data('sp500.csv', 50, True)
In [139]:
#Buidl our model
model = Sequential()
model.add(LSTM(
input_dim=1,
output_dim=50,
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
input_dim=50,
output_dim=50,
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
input_dim=50,
output_dim=50,
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
100,
return_sequences=False))
model.add(Dropout(0.2))
#Aggregate to 1 single value
model.add(Dense(
output_dim=1))
model.add(Activation('linear'))
In [140]:
start = time.time()
In [141]:
model.compile(loss='mse', optimizer='rmsprop')
print ('compile time: ', time.time() - start)
In [142]:
#train our model
model.fit(x_train, y_train, batch_size=512, nb_epoch=1, validation_split=0.05)
Out[142]:
In [143]:
#Pulled from Siraj's project
def predict_sequence_full(model, data, window_size):
#Shift the window by 1 new prediction each time, re-run predictions on new window
curr_frame = data[0]
predicted = []
for i in range(len(data)):
predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
return predicted
def predict_sequences_multiple(model, data, window_size, prediction_len):
#Predict sequence of 50 steps before shifting prediction run forward by 50 steps
prediction_seqs = []
for i in range(int(len(data)/prediction_len)):
curr_frame = data[i*prediction_len]
predicted = []
for j in range(prediction_len):
predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
prediction_seqs.append(predicted)
return prediction_seqs
def plot_results_multiple(predicted_data, true_data, prediction_len):
fig = plt.figure(facecolor='white', figsize=(14, 10))
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
#Pad the list of predictions to shift it in the graph to it's correct start
for i, data in enumerate(predicted_data):
padding = [None for p in range(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
plt.legend()
plt.show()
In [144]:
predictions = predict_sequences_multiple(model, x_test, 50, 50)
plot_results_multiple(predictions, y_test, 50)
In [ ]: