In [2]:
import pandas as pd
import numpy as np
import os
In [3]:
with pd.HDFStore(os.path.join('/', 'root', 'fin_lstm', 'observations.h5')) as store:
observations = store['o']
In [4]:
# Cols to drop:
ctd = ['Open', 'High', 'Low', 'Close', # stock price. Leave volume
'o_id', # id
'COMLND'] # commercial lending index - many NaNs
observations = observations.drop(ctd, axis=1).dropna()
# Normalize Volume
v = observations['Volume']
mi = v.min(); di = v.max() - mi
observations['Volume'] = v.apply(lambda t: (t - mi) / di)
# Separate labels
labels = observations['sig^2'] = observations['sig^2']*100
observations['lag_sig^2'] *= 100
# Drop labels from observations
# observations.drop('sig^2', axis=1, inplace=True)
In [5]:
# Split into train and validate
mark = int(len(observations)*.7)
tX = observations[:mark]
tY = labels[:mark]
vX = observations[mark:]
vY = labels[mark:]
In [6]:
timesteps = 10
In [51]:
create_dataset(observations.values, labels.values, 2)
Out[51]:
In [11]:
def create_dataset(dataset, Y, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), :]
dataX.append(a)
dataY.append(Y[i + look_back])
return np.array(dataX), np.array(dataY)
In [8]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import RMSprop, SGD, Nadam
from keras.callbacks import TensorBoard
In [9]:
# expected input data shape: (batch_size, timesteps, data_dim)
data_dim = tX.values.shape[1]
model = Sequential()
model.add(LSTM(40, return_sequences=True,
input_shape=(timesteps, data_dim)
))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(1))
model.compile(loss='msle', optimizer=RMSprop(), metrics=['accuracy'])
# TB callback
TB = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)
In [ ]:
model.fit(*create_dataset(observations.values, labels.values, timesteps),
batch_size=50, nb_epoch=500, verbose=2,
validation_split=.2,
shuffle=True,
callbacks=[TB])
In [ ]:
In [ ]:
model.evaluate(*create_dataset(observations.values, labels.values, 1), batch_size=10, verbose=1)
In [ ]: