In [1]:
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
In [2]:
# read the train batches from files created by the Preprocessing_NN_Data notebook
def read_batches(filepath, include_cl_kadij_input=True):
batches = []
number_read = -1
while True:
number_read += 1
try:
batch = pd.read_csv(filepath + '_' + str(number_read) + '.csv', index_col=0, parse_dates=True)
if not include_cl_kadij_input:
batch = batch.drop('cl_kadij_1', 1)
batches.append(batch)
except FileNotFoundError:
break
return batches
include_cl_kadij_input = False #used to toggle whether cl_kadij_1 (yesterday's value) is included in input
train_batches = read_batches('data/train/batch', include_cl_kadij_input)
In [3]:
#print an example batch to see what they look like
batch = train_batches[0]
batch.head()
Out[3]:
In [4]:
# convert the dataframes in train_batches to the correct input and output shape (and make it arrays)
def convert_to_keras_input(batches):
input_batches = []
output_batches = []
for batch in batches:
input_df = batch.iloc[:,1:]
output_df = batch['cl_kadij_out']
input_batches.append(input_df.as_matrix()[:,np.newaxis,:])
output_batches.append(output_df.as_matrix())
return (input_batches, output_batches)
(input_train_batches, output_train_batches) = convert_to_keras_input(train_batches)
In [5]:
# create a Keras model of the NN
n_cells = 4
input_shape = input_train_batches[0].shape[1:]
model = keras.models.Sequential()
model.add(keras.layers.LSTM(n_cells, input_shape=input_shape, return_sequences=True))
model.add(keras.layers.LSTM(n_cells))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()
In [6]:
%%time
# train the model (this will take hours!)
n_epochs = 1000
report_at = 25
for i in range(0, n_epochs):
first_batch = 0
if i % report_at == 0:
print('At epoch', i)
model.fit(input_train_batches[0], output_train_batches[0], epochs=1, batch_size=len(output_train_batches[0]), verbose=2)
first_batch = 1
for j in range(first_batch,len(input_train_batches)):
model.fit(input_train_batches[j], output_train_batches[j], epochs=1, batch_size=len(output_train_batches[j]), verbose=0)
In [7]:
# write the model to a file, don't forget to change the output filename otherwise it won't write!
output_filename = 'models/2layer_lstm_4cells_noKadij'
from pathlib import Path
output_file = Path(output_filename + '.h5')
if output_file.is_file():
print('Please choose a different filename, this one already exists!')
else:
model.save(output_filename + '.h5')
info_file = open(output_filename + '_info.txt', 'w')
info_file.write('number of epochs: ' + str(n_epochs) + '\n')
info_file.write('number of cells per layer: ' + str(n_cells) + '\n')
info_file.write('including cl_kadij in input:' + str(include_cl_kadij_input) + '\n\n')
model.summary(print_fn=lambda x: info_file.write(x + '\n'))
info_file.close()
In [ ]: