p-LSTM cells introduced in https://github.com/dannyneil/public_plstm, Keras implementation from https://github.com/fferroni/PhasedLSTM-Keras
The idea of the notebook is the following: We have a collection of values for stocks, and given the evolution of all but one of them, we want to predict what will be the value of the remaining one on the last (think of it as the current) moment.
In [1]:
import numpy as np
import pandas as pd
from keras.layers.core import Dense
from keras.models import Sequential
from keras.layers.recurrent import LSTM
import matplotlib.pyplot as plt
try: # For downloading the dataset
from urllib2 import Request, urlopen
except ImportError:
from urllib.request import Request, urlopen
import zipfile # For unzipping the dataset
from phased_lstm_keras.PhasedLSTM import PhasedLSTM #Phased LSTM implementation
In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00312/dow_jones_index.zip"
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
content = urlopen(req).read()
with open("dow_jones_index.zip", 'wb') as f:
f.write(content)
with zipfile.ZipFile("dow_jones_index.zip","r") as zip_ref:
zip_ref.extractall("dow_jones_index")
In [3]:
prices_dataset = pd.read_csv('dow_jones_index/dow_jones_index.data', header=0)
# To remove the dollar signs
#def drop_dollar(string):
# result = string.replace("$", " ")
# return " ".join(result.split())
#for col in ['open', 'high', 'low', 'close', 'next_weeks_open', 'next_weeks_close']:
# prices_dataset[col] = prices_dataset[col].apply(lambda x: drop_dollar(x))
prices_dataset
Out[3]:
In [4]:
stocks = [prices_dataset['stock'][0]]
for stock in prices_dataset['stock']:
if stocks[-1] != stock:
stocks.append(stock)
stocks = stocks[:int(len(stocks)/2)] #Remove duplicates
In [5]:
# Arrange the percent change in stocks and store the weekly values
prices = []
for stock in stocks:
stockinfo = prices_dataset[prices_dataset['stock']==stock]
prices.append(stockinfo.percent_change_price.values.astype('float32'))
prices = np.array(prices)
# Read the percent change for the next week
nw_prices = []
for stock in stocks:
stockinfo = prices_dataset[prices_dataset['stock']==stock]
nw_prices.append(stockinfo.percent_change_next_weeks_price.values.astype('float32'))
nw_prices = np.array(nw_prices)
In [6]:
for price in prices:
plt.plot(price)
plt.show()
In [7]:
# Define training and test sets
trainX, testX = prices[:-1,:], prices[-1,:]
trainY, testY = nw_prices[:-1,-1], nw_prices[-1,-1]
#Reshapings to feed the NN correctly
trainX = trainX.reshape((*trainX.shape, 1))
testX = testX.reshape((1, testX.shape[0], 1))
In [8]:
# Build pLSTM Model
model_plstm = Sequential()
model_plstm.add(PhasedLSTM(100, input_shape=(25, 1), dropout=0.2))
model_plstm.add(Dense(1, activation='linear'))
model_plstm.compile(loss='mse', optimizer='adam')
# And LSTM model to compare
model_lstm = Sequential()
model_lstm.add(LSTM(100, input_shape=(25, 1), dropout=0.2))
model_lstm.add(Dense(1, activation='linear'))
model_lstm.compile(loss='mse', optimizer='adam')
In [9]:
model_plstm.fit(trainX, trainY, epochs=20, batch_size=1)
model_lstm.fit(trainX, trainY, epochs=20, batch_size=1)
Out[9]:
In [10]:
pred_plstm = model_plstm.predict(testX)
pred_lstm = model_lstm.predict(testX)
err_plstm = abs(pred_plstm[0][0] - testY) / testY
err_lstm = abs(pred_lstm[0][0] - testY) / testY
print('The error with Phased LSTM is {}, while with LSTM is {}'.format(err_plstm, err_lstm))