In [76]:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [33]:
def normalize_windows(win_data):
    normalized_data = []
    for window in win_data:
        normalized_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalized_data.append(normalized_window)
    return normalized_data

In [137]:
def load_data(filename, seq_len, normalize_window):
    data = pd.read_csv(filename, sep='\n', index_col=False, header=None).values.flatten()
    
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    if normalize_window:
        result = normalize_windows(result)
    
    result = np.array(result)
    
    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]
    
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
    return [x_train, y_train, x_test, y_test]

In [138]:
#load in our data
x_train, y_train, x_test, y_test = load_data('sp500.csv', 50, True)

In [139]:
#Buidl our model
model = Sequential()
model.add(LSTM(
    input_dim=1,
    output_dim=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    input_dim=50,
    output_dim=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    input_dim=50,
    output_dim=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    100,
    return_sequences=False))
model.add(Dropout(0.2))

#Aggregate to 1 single value
model.add(Dense(
    output_dim=1))
model.add(Activation('linear'))

In [140]:
start = time.time()

In [141]:
model.compile(loss='mse', optimizer='rmsprop')
print ('compile time: ', time.time() - start)


compile time:  2.749022960662842

In [142]:
#train our model
model.fit(x_train, y_train, batch_size=512, nb_epoch=1, validation_split=0.05)


Train on 3522 samples, validate on 186 samples
Epoch 1/1
3522/3522 [==============================] - 15s - loss: 0.0409 - val_loss: 0.0030
Out[142]:
<keras.callbacks.History at 0x11c187710>

In [143]:
#Pulled from Siraj's project
def predict_sequence_full(model, data, window_size):
    #Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data[0]
    predicted = []
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
    return predicted

def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in range(int(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

def plot_results_multiple(predicted_data, true_data, prediction_len):
    fig = plt.figure(facecolor='white', figsize=(14, 10))
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    #Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_len)]
        plt.plot(padding + data, label='Prediction')
        plt.legend()
    plt.show()

In [144]:
predictions = predict_sequences_multiple(model, x_test, 50, 50)
plot_results_multiple(predictions, y_test, 50)



In [ ]: