In [1]:
dpath = 'data/basic/'
#path_to_dataset = dpath + 'household_power_consumption.txt'

In [2]:
%mkdir -p dpath
!wget -P $dpath https://raw.githubusercontent.com/jaungiers/LSTM-Neural-Network-for-Time-Series-Prediction/master/sinwave.csv
!wget -P $dpath https://raw.githubusercontent.com/jaungiers/LSTM-Neural-Network-for-Time-Series-Prediction/master/sp500.csv


wget: /home/tw/anaconda3/lib/libcrypto.so.1.0.0: no version information available (required by wget)
wget: /home/tw/anaconda3/lib/libssl.so.1.0.0: no version information available (required by wget)
wget: /home/tw/anaconda3/lib/libssl.so.1.0.0: no version information available (required by wget)
--2017-02-04 06:49:31--  https://raw.githubusercontent.com/jaungiers/LSTM-Neural-Network-for-Time-Series-Prediction/master/sinwave.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.112.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.112.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 61711 (60K) [text/plain]
Saving to: ‘data/basic/sinwave.csv.1’

sinwave.csv.1       100%[===================>]  60,26K  --.-KB/s    in 0,07s   

2017-02-04 06:49:32 (849 KB/s) - ‘data/basic/sinwave.csv.1’ saved [61711/61711]

wget: /home/tw/anaconda3/lib/libcrypto.so.1.0.0: no version information available (required by wget)
wget: /home/tw/anaconda3/lib/libssl.so.1.0.0: no version information available (required by wget)
wget: /home/tw/anaconda3/lib/libssl.so.1.0.0: no version information available (required by wget)
--2017-02-04 06:49:32--  https://raw.githubusercontent.com/jaungiers/LSTM-Neural-Network-for-Time-Series-Prediction/master/sp500.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.112.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.112.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 48119 (47K) [text/plain]
Saving to: ‘data/basic/sp500.csv.1’

sp500.csv.1         100%[===================>]  46,99K  --.-KB/s    in 0,06s   

2017-02-04 06:49:32 (743 KB/s) - ‘data/basic/sp500.csv.1’ saved [48119/48119]


In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

import time
import warnings
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

warnings.filterwarnings("ignore")


Using TensorFlow backend.

In [83]:
import pandas as pd

def load_ts(filename):
    df = pd.read_csv(filename, header=None)
    data = df[0].tolist()
    return data

#filename = 'sinwave.csv'
filename = 'sp500.csv'
print('> Loading data...: ', dpath+filename)
#X_train, y_train, X_test, y_test = load_data(dpath+'sp500.csv', seq_len, True)
ts = load_ts(dpath + filename)
ts[:10]


> Loading data...:  data/basic/sp500.csv
Out[83]:
[1455.219971,
 1399.420044,
 1402.109985,
 1403.449951,
 1441.469971,
 1457.599976,
 1438.560059,
 1432.25,
 1449.680054,
 1465.150024]

In [84]:
plt.plot(ts)


Out[84]:
[<matplotlib.lines.Line2D at 0x7f3557eda828>]

In [85]:
seq_len = 50 

def load_data(ts, seq_len, normalise_window):

    sequence_length = seq_len + 1
    result = []
    # create gliding window
    for index in range(len(ts) - sequence_length):
        result.append(ts[index: index + sequence_length])
    
    if normalise_window:
        result = normalise_windows(result)

    result = np.array(result)

    print("Data shape: ", result.shape)
    print(result[:4, :])
    row = round(0.9 * result.shape[0])
    train = result[:row, :]
    test = result[row:, :]
    print("Test shape: ", test.shape)
    #np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = test[:, :-1]
    print("xtest shape: ", x_test.shape)
    y_test = test[:, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data

print('> Loading data...')

#X_train, y_train, X_test, y_test = load_data(ts, seq_len, False)
X_train, y_train, X_test, y_test = load_data(ts, seq_len, True)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
X_train[0,:seq_len, 0]
X_train[1,:seq_len, 0]
y_train[:5]


> Loading data...
Data shape:  (4120, 51)
[[  0.00000000e+00  -3.83446682e-02  -3.64961910e-02  -3.55753914e-02
   -9.44874333e-03   1.63549501e-03  -1.14483805e-02  -1.57845353e-02
   -3.80692755e-03   6.82374706e-03  -5.49442707e-05   4.67319727e-04
   -6.63131705e-03  -9.52432366e-03  -3.68947259e-02  -3.10536846e-02
   -3.51355850e-02  -3.89356339e-02  -6.53234143e-02  -4.17531447e-02
   -3.15690706e-02  -3.16790430e-02  -2.07872353e-02  -2.11995276e-02
   -2.12888647e-02  -9.27694800e-03  -2.98992667e-02  -2.63809017e-02
   -4.67970323e-02  -4.48592181e-02  -3.65373779e-02  -4.64190489e-02
   -4.60136353e-02  -7.49921023e-02  -7.08139862e-02  -6.49592721e-02
   -6.99481309e-02  -8.37399077e-02  -7.36451699e-02  -6.10216522e-02
   -5.22464174e-02  -5.04803139e-02  -3.16446502e-02  -4.39383346e-02
   -6.84432443e-02  -6.08293054e-02  -3.67848374e-02  -4.13339744e-02
   -4.92021670e-02  -6.60174743e-02  -4.33473683e-02]
 [  0.00000000e+00   1.92218270e-03   2.87969793e-03   3.00481097e-02
    4.15743166e-02   2.79687397e-02   2.34596868e-02   3.59148850e-02
    4.69694430e-02   3.98164734e-02   4.03595620e-02   3.29778769e-02
    2.99695157e-02   1.50775674e-03   7.58170147e-03   3.33704095e-03
   -6.14529572e-04  -2.80544860e-02  -3.54438471e-03   7.04576517e-03
    6.93140779e-03   1.82575111e-02   1.78287792e-02   1.77358800e-02
    3.02267551e-02   8.78215019e-03   1.24408051e-02  -8.78939033e-03
   -6.77430843e-03   1.87935353e-03  -8.39633536e-03  -7.97475643e-03
   -3.81086995e-02  -3.37639869e-02  -2.76758241e-02  -3.28636067e-02
   -4.72053114e-02  -3.67080600e-02  -2.35811972e-02  -1.44560621e-02
   -1.26195377e-02   6.96717190e-03  -5.81670602e-03  -3.12987149e-02
   -2.33811808e-02   1.62202693e-03  -3.10850057e-03  -1.12904264e-02
   -2.87762207e-02  -5.20217574e-03   4.21959992e-02]
 [  0.00000000e+00   9.55678238e-04   2.80719675e-02   3.95760615e-02
    2.59965869e-02   2.14961846e-02   3.39274875e-02   4.49608374e-02
    3.78215907e-02   3.83636374e-02   3.09961140e-02   2.79935243e-02
   -4.13630889e-04   5.64866101e-03   1.41214386e-03  -2.53184560e-03
   -2.99191586e-02  -5.45607982e-03   5.11375290e-03   4.99961492e-03
    1.63039892e-02   1.58760798e-02   1.57833588e-02   2.82502703e-02
    6.84680667e-03   1.04984425e-02  -1.06910229e-02  -8.67980696e-03
   -4.27470032e-05  -1.02987220e-02  -9.87795191e-03  -3.99540832e-02
   -3.56177058e-02  -2.95412232e-02  -3.47190531e-02  -4.90332433e-02
   -3.85561308e-02  -2.54544518e-02  -1.63468232e-02  -1.45138222e-02
    5.03531041e-03  -7.72404171e-03  -3.31571635e-02  -2.52548191e-02
   -2.99579922e-04  -5.02103193e-03  -1.31872608e-02  -3.06395086e-02
   -7.11069039e-03   4.01965513e-02   4.44758162e-02]
 [  0.00000000e+00   2.70903996e-02   3.85835098e-02   2.50170004e-02
    2.05208949e-02   3.29403289e-02   4.39631445e-02   3.68307142e-02
    3.73722433e-02   3.00117542e-02   2.70120313e-02  -1.36800176e-03
    4.68850207e-03   4.56029800e-04  -3.48419407e-03  -3.08453586e-02
   -6.40563633e-03   4.15410467e-03   4.04007567e-03   1.53336569e-02
    1.49061561e-02   1.48135236e-02   2.72685321e-02   5.88550379e-03
    9.53365312e-03  -1.16355813e-02  -9.62628556e-03  -9.97471979e-04
   -1.12436550e-02  -1.08232866e-02  -4.08707022e-02  -3.65384651e-02
   -3.04677840e-02  -3.56406703e-02  -4.99411938e-02  -3.94740845e-02
   -2.63849145e-02  -1.72859816e-02  -1.54547307e-02   4.07573708e-03
   -8.67143284e-03  -3.40802720e-02  -2.61854724e-02  -1.25405968e-03
   -5.97100381e-03  -1.41294358e-02  -3.15650209e-02  -8.05866714e-03
    3.92034073e-02   4.34785864e-02   3.78923765e-02]]
Test shape:  (412, 51)
xtest shape:  (412, 50)
Out[85]:
((3708, 50, 1), (3708,), (412, 50, 1), (412,))
Out[85]:
array([  0.00000000e+00,  -3.83446682e-02,  -3.64961910e-02,
        -3.55753914e-02,  -9.44874333e-03,   1.63549501e-03,
        -1.14483805e-02,  -1.57845353e-02,  -3.80692755e-03,
         6.82374706e-03,  -5.49442707e-05,   4.67319727e-04,
        -6.63131705e-03,  -9.52432366e-03,  -3.68947259e-02,
        -3.10536846e-02,  -3.51355850e-02,  -3.89356339e-02,
        -6.53234143e-02,  -4.17531447e-02,  -3.15690706e-02,
        -3.16790430e-02,  -2.07872353e-02,  -2.11995276e-02,
        -2.12888647e-02,  -9.27694800e-03,  -2.98992667e-02,
        -2.63809017e-02,  -4.67970323e-02,  -4.48592181e-02,
        -3.65373779e-02,  -4.64190489e-02,  -4.60136353e-02,
        -7.49921023e-02,  -7.08139862e-02,  -6.49592721e-02,
        -6.99481309e-02,  -8.37399077e-02,  -7.36451699e-02,
        -6.10216522e-02,  -5.22464174e-02,  -5.04803139e-02,
        -3.16446502e-02,  -4.39383346e-02,  -6.84432443e-02,
        -6.08293054e-02,  -3.67848374e-02,  -4.13339744e-02,
        -4.92021670e-02,  -6.60174743e-02])
Out[85]:
array([ 0.        ,  0.00192218,  0.0028797 ,  0.03004811,  0.04157432,
        0.02796874,  0.02345969,  0.03591489,  0.04696944,  0.03981647,
        0.04035956,  0.03297788,  0.02996952,  0.00150776,  0.0075817 ,
        0.00333704, -0.00061453, -0.02805449, -0.00354438,  0.00704577,
        0.00693141,  0.01825751,  0.01782878,  0.01773588,  0.03022676,
        0.00878215,  0.01244081, -0.00878939, -0.00677431,  0.00187935,
       -0.00839634, -0.00797476, -0.0381087 , -0.03376399, -0.02767582,
       -0.03286361, -0.04720531, -0.03670806, -0.0235812 , -0.01445606,
       -0.01261954,  0.00696717, -0.00581671, -0.03129871, -0.02338118,
        0.00162203, -0.0031085 , -0.01129043, -0.02877622, -0.00520218])
Out[85]:
array([-0.04334737,  0.042196  ,  0.04447582,  0.03789238,  0.0363518 ])

In [86]:
#plt.plot(X_train[0,:,0])
#plt.plot(y_train)
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(211)
ax.plot(X_train[0, :, 0], label='X')
ax.legend()
ax = fig.add_subplot(212)
ax.plot(y_train, label='y')
ax.legend()


Out[86]:
[<matplotlib.lines.Line2D at 0x7f3557de3748>]
Out[86]:
<matplotlib.legend.Legend at 0x7f3557de36d8>
Out[86]:
[<matplotlib.lines.Line2D at 0x7f3557da5588>]
Out[86]:
<matplotlib.legend.Legend at 0x7f3557dd3668>

In [66]:
def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("Compilation Time : ", time.time() - start)
    return model

print('> Data Loaded. Compiling...')
model = build_model([1, 50, 100, 1])
model.summary()


> Data Loaded. Compiling...
Compilation Time :  3.6532278060913086
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
lstm_11 (LSTM)                   (None, None, 50)      10400       lstm_input_6[0][0]               
____________________________________________________________________________________________________
dropout_11 (Dropout)             (None, None, 50)      0           lstm_11[0][0]                    
____________________________________________________________________________________________________
lstm_12 (LSTM)                   (None, 100)           60400       dropout_11[0][0]                 
____________________________________________________________________________________________________
dropout_12 (Dropout)             (None, 100)           0           lstm_12[0][0]                    
____________________________________________________________________________________________________
dense_6 (Dense)                  (None, 1)             101         dropout_12[0][0]                 
____________________________________________________________________________________________________
activation_6 (Activation)        (None, 1)             0           dense_6[0][0]                    
====================================================================================================
Total params: 70901
____________________________________________________________________________________________________

In [87]:
start_time = time.time()
epochs  = 50

model.fit(
    X_train,
    y_train,
    batch_size=512,
    nb_epoch=epochs,
    validation_split=0.05)

print("Training time: ", time.time() - start_time)


Train on 3522 samples, validate on 186 samples
Epoch 1/50
3522/3522 [==============================] - 1s - loss: 0.0057 - val_loss: 5.4800e-04
Epoch 2/50
3522/3522 [==============================] - 1s - loss: 0.0018 - val_loss: 4.3844e-04
Epoch 3/50
3522/3522 [==============================] - 1s - loss: 0.0014 - val_loss: 4.3329e-04
Epoch 4/50
3522/3522 [==============================] - 1s - loss: 0.0013 - val_loss: 4.3263e-04
Epoch 5/50
3522/3522 [==============================] - 1s - loss: 0.0012 - val_loss: 4.1390e-04
Epoch 6/50
3522/3522 [==============================] - 1s - loss: 0.0010 - val_loss: 3.4870e-04
Epoch 7/50
3522/3522 [==============================] - 1s - loss: 9.9863e-04 - val_loss: 3.3748e-04
Epoch 8/50
3522/3522 [==============================] - 1s - loss: 8.7663e-04 - val_loss: 4.5490e-04
Epoch 9/50
3522/3522 [==============================] - 1s - loss: 8.3708e-04 - val_loss: 2.3891e-04
Epoch 10/50
3522/3522 [==============================] - 1s - loss: 8.5208e-04 - val_loss: 4.5102e-04
Epoch 11/50
3522/3522 [==============================] - 1s - loss: 7.4192e-04 - val_loss: 1.9033e-04
Epoch 12/50
3522/3522 [==============================] - 1s - loss: 7.1124e-04 - val_loss: 2.8672e-04
Epoch 13/50
3522/3522 [==============================] - 1s - loss: 6.6047e-04 - val_loss: 2.1140e-04
Epoch 14/50
3522/3522 [==============================] - 1s - loss: 5.6060e-04 - val_loss: 1.6691e-04
Epoch 15/50
3522/3522 [==============================] - 1s - loss: 6.2278e-04 - val_loss: 2.5480e-04
Epoch 16/50
3522/3522 [==============================] - 1s - loss: 6.5873e-04 - val_loss: 1.6551e-04
Epoch 17/50
3522/3522 [==============================] - 1s - loss: 4.4757e-04 - val_loss: 1.2327e-04
Epoch 18/50
3522/3522 [==============================] - 1s - loss: 4.1725e-04 - val_loss: 2.0048e-04
Epoch 19/50
3522/3522 [==============================] - 1s - loss: 4.7712e-04 - val_loss: 1.4705e-04
Epoch 20/50
3522/3522 [==============================] - 1s - loss: 5.2034e-04 - val_loss: 1.3178e-04
Epoch 21/50
3522/3522 [==============================] - 1s - loss: 4.0419e-04 - val_loss: 1.0871e-04
Epoch 22/50
3522/3522 [==============================] - 1s - loss: 3.6117e-04 - val_loss: 1.0188e-04
Epoch 23/50
3522/3522 [==============================] - 1s - loss: 4.0884e-04 - val_loss: 2.7626e-04
Epoch 24/50
3522/3522 [==============================] - 1s - loss: 5.3789e-04 - val_loss: 1.0429e-04
Epoch 25/50
3522/3522 [==============================] - 1s - loss: 3.3739e-04 - val_loss: 1.0608e-04
Epoch 26/50
3522/3522 [==============================] - 1s - loss: 3.5465e-04 - val_loss: 1.0753e-04
Epoch 27/50
3522/3522 [==============================] - 1s - loss: 3.7657e-04 - val_loss: 1.4538e-04
Epoch 28/50
3522/3522 [==============================] - 1s - loss: 3.1452e-04 - val_loss: 8.5578e-05
Epoch 29/50
3522/3522 [==============================] - 1s - loss: 3.5238e-04 - val_loss: 2.4326e-04
Epoch 30/50
3522/3522 [==============================] - 1s - loss: 3.5266e-04 - val_loss: 8.2901e-05
Epoch 31/50
3522/3522 [==============================] - 1s - loss: 2.9801e-04 - val_loss: 9.2643e-05
Epoch 32/50
3522/3522 [==============================] - 1s - loss: 3.0176e-04 - val_loss: 1.4616e-04
Epoch 33/50
3522/3522 [==============================] - 1s - loss: 3.7631e-04 - val_loss: 7.6111e-05
Epoch 34/50
3522/3522 [==============================] - 1s - loss: 2.9481e-04 - val_loss: 1.3112e-04
Epoch 35/50
3522/3522 [==============================] - 1s - loss: 2.9081e-04 - val_loss: 8.6631e-05
Epoch 36/50
3522/3522 [==============================] - 1s - loss: 2.8179e-04 - val_loss: 1.4647e-04
Epoch 37/50
3522/3522 [==============================] - 1s - loss: 2.9196e-04 - val_loss: 7.1296e-05
Epoch 38/50
3522/3522 [==============================] - 1s - loss: 2.5280e-04 - val_loss: 8.3958e-05
Epoch 39/50
3522/3522 [==============================] - 1s - loss: 3.0130e-04 - val_loss: 7.6845e-05
Epoch 40/50
3522/3522 [==============================] - 1s - loss: 2.8943e-04 - val_loss: 9.3091e-05
Epoch 41/50
3522/3522 [==============================] - 1s - loss: 2.5526e-04 - val_loss: 1.3136e-04
Epoch 42/50
3522/3522 [==============================] - 1s - loss: 2.8079e-04 - val_loss: 6.2884e-05
Epoch 43/50
3522/3522 [==============================] - 1s - loss: 2.5507e-04 - val_loss: 6.7334e-05
Epoch 44/50
3522/3522 [==============================] - 1s - loss: 2.8394e-04 - val_loss: 9.4167e-05
Epoch 45/50
3522/3522 [==============================] - 1s - loss: 2.6279e-04 - val_loss: 9.3866e-05
Epoch 46/50
3522/3522 [==============================] - 1s - loss: 2.5222e-04 - val_loss: 7.4048e-05
Epoch 47/50
3522/3522 [==============================] - 1s - loss: 2.7000e-04 - val_loss: 8.7919e-05
Epoch 48/50
3522/3522 [==============================] - 1s - loss: 2.4998e-04 - val_loss: 7.4581e-05
Epoch 49/50
3522/3522 [==============================] - 1s - loss: 2.3174e-04 - val_loss: 7.0322e-05
Epoch 50/50
3522/3522 [==============================] - 1s - loss: 2.7604e-04 - val_loss: 6.2809e-05
Out[87]:
<keras.callbacks.History at 0x7f355bba45f8>
Training time:  78.43250107765198

In [ ]:
def plot_results(predicted_data, true_data, figsize=(12,6)):
    fig = plt.figure(facecolor='white', figsize=figsize)
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.plot(predicted_data, label='Prediction')
    plt.legend()
    plt.show()

If you’re observant you’ll have noticed in our load_data() function above we split the data in to train/test sets as is standard practice for machine learning problems. However what we need to watch out for here is what we actually want to achieve in the prediction of the time series.

If we were to use the test set as it is, we would be running each window full of the true data to predict the next time step. This is fine if we are only looking to predict one time step ahead, however if we’re looking to predict more than one time step ahead, maybe looking to predict any emergent trends or functions (e.g. the sin function in this case) using the full test set would mean we would be predicting the next time step but then disregarding that prediction when it comes to subsequent time steps and using only the true data for each time step. s You can see below the graph of using this approach to predict only one time step ahead at each step in time:


In [73]:
def predict_point_by_point(model, data):
    #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
    predicted = model.predict(data)
    predicted = np.reshape(predicted, (predicted.size,))
    return predicted

start_time = time.time()
predicted = predict_point_by_point(model, X_test)        
predicted[:5]
print("Prediction time: ", time.time() - start_time)

plot_results(predicted, y_test)


Out[73]:
array([ 1.01226306,  1.01924646,  1.0207845 ,  1.01761591,  1.01051164], dtype=float32)
Prediction time:  0.7436399459838867

If however we want to do real magic and predict many time steps ahead we only use the first window from the testing data as an initiation window. At each time step we then pop the oldest entry out of the rear of the window and append the prediction for the next time step to the front of the window, in essence shifting the window along so it slowly builds itself with predictions, until the window is full of only predicted values (in our case, as our window is of size 50 this would occur after 50 time steps). We then keep this up indefinitely, predicting the next time step on the predictions of the previous future time steps, to hopefully see an emerging trend.


In [71]:
def predict_sequence_full(model, data, window_size):
    #Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data[0]
    predicted = []
    
    # loop over entire testdata
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])  #get element from shape(1,1)
        curr_frame = curr_frame[1:]  #move window
        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)  #fill frame with prediction
    return predicted

start_time = time.time()
predicted = predict_sequence_full(model, X_test, seq_len)
predicted[:5]
print("Prediction time: ", time.time() - start_time)

plot_results(predicted, y_test)


Out[71]:
[1.0122631, 1.0192991, 1.0212502, 1.0188733, 1.0127699]
Prediction time:  23.757328987121582

Overlaid with the true data we can see that with just 1 epoch and a reasonably small training set of data the LSTM has already done a pretty damn good job of predicting the sin function. You can see that as we predict more and more into the future the error margin increases as errors in the prior predictions are amplified more and more when they are used for future predictions. As such we see that the LSTM hasn’t got the frequency quite right and it drifts the more we try to predict it. However as the sin function is a very easy oscillating function with zero noise it can predict it to a good degree.

A NOT-SO-SIMPLE STOCK MARKET

We predicted a several hundred time steps of a sin wave on an accurate point-by-point basis. So we can now just do the same on a stock market time series and make a shit load of money right?

Well, no.

A stock time series is unfortunately not a function that can be mapped. It can best described more as a random walk, which makes the whole prediction thing considerably harder. But what about the LSTM identifying any underlying hidden trends? Well, let’s take a look.

Here is a CSV file where I have taken the adjusted daily closing price of the S&P 500 equity index from January 2000 – August 2016. I’ve stripped out everything to make it in the exact same format as our sin wave data and we will now run it through the same model we used on the sin wave with the same train/test split.

There is one slight change we need to make to our data however, because a sin wave is already a nicely normalized repeating pattern it works well running the raw data points through the network. However running the adjusted returns of a stock index through a network would make the optimization process shit itself and not converge to any sort of optimums for such large numbers.

So to combat this we will take each n-sized window of training/testing data and normalize each one to reflect percentage changes from the start of that window (so the data at point i=0 will always be 0). We’ll use the following equations to normalise and subsequently de-normalise at the end of the prediction process to get a real world number out of the prediction:

n = normalised list [window] of price changes p = raw list [window] of adjusted daily return prices


In [ ]:
start_time = time.time()
predicted = predict_point_by_point(model, X_test)        
predicted[:5]
print("Prediction time: ", time.time() - start_time)

In [100]:
plot_results(predicted, y_test, figsize=(20,10))


Running the data on a single point-by-point prediction as mentioned above gives something that matches the returns pretty closely. But this is deceptive! Why? Well if you look more closely, the prediction line is made up of singular prediction points that have had the whole prior true history window behind them. Because of that, the network doesn’t need to know much about the time series itself other than that each next point most likely won’t be too far from the last point. So even if it gets the prediction for the point wrong, the next prediction will then factor in the true history and disregard the incorrect prediction, yet again allowing for an error to be made.

We can’t see what is happening in the brain of the LSTM, but I would make a strong case that for this prediction of what is essentially a random walk (and as a matter of point, I have made a completely random walk of data that mimics the look of a stock index, and the exact same thing holds true there as well!) is “predicting” the next point with essentially a Gaussian distribution, allowing the essentially random prediction to not stray too wildly from the true data.

So what would we look at if we wanted to see whether there truly was some underlying pattern discernable in just the price movements? Well we would do the same as for the sin wave problem and let the network predict a sequence of points rather than just the next one.

Doing that we can now see that unlike the sin wave which carried on as a sin wave sequence that was almost identical to the true data, our stock data predictions converge very quickly into some sort of equilibrium.


In [ ]:
start_time = time.time()
predicted = predict_sequence_full(model, X_test, seq_len)
predicted[:5]
print("Prediction time: ", time.time() - start_time)

In [101]:
plot_results(predicted, y_test, figsize=(20,10))



In [102]:
def plot_results_multiple(predicted_data, true_data, prediction_len, figsize=(12,6)):
    fig = plt.figure(facecolor='white', figsize=figsize)
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    #Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_len)]
        plt.plot(padding + data, label='Prediction'+str(i))
        plt.legend()
    plt.show()

In [ ]:
def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in range(len(data)//prediction_len):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

start_time = time.time()
predictions = predict_sequences_multiple(model, X_test, seq_len, 50)
#predicted = predict_sequence_full(model, X_test, seq_len)
#predicted = predict_point_by_point(model, X_test)        
print("Prediction time: ", time.time() - start_time)

In [103]:
plot_results_multiple(predictions, y_test, 50, figsize=(20,10))


In fact when we take a look at the graph above of the same run but with the epochs increased to 400 (which should make the model mode accurate) we see that actually it now just tries is predict an upwards momentum for almost every time period!


In [ ]: