In [1]:
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline


Using TensorFlow backend.

In [2]:
# read the train batches from files created by the Preprocessing_NN_Data notebook
def read_batches(filepath, include_cl_kadij_input=True):
    batches = []
    number_read = -1
    while True:
        number_read += 1
        try:
            batch = pd.read_csv(filepath + '_' + str(number_read) + '.csv', index_col=0, parse_dates=True)
            if not include_cl_kadij_input:
                batch = batch.drop('cl_kadij_1', 1)
            batches.append(batch)
        except FileNotFoundError:
            break
        
    return batches

include_cl_kadij_input = False #used to toggle whether cl_kadij_1 (yesterday's value) is included in input
train_batches = read_batches('data/train/batch', include_cl_kadij_input)

In [3]:
#print an example batch to see what they look like
batch = train_batches[0]
batch.head()


Out[3]:
cl_kadij_out cl_lobith_3 cl_lobith_4 cl_lobith_5 cl_lobith_6 afv_lobith_6 afv_lobith_7 afv_lobith_8 afv_lobith_9 cum_stand_1 cum_stand_2
1987-11-21 21:00:00 0.471389 0.600541 0.617482 0.676992 0.730888 0.203889 0.176697 0.156819 0.146625 0.754001 0.491503
1987-11-22 21:00:00 0.407675 0.634160 0.600541 0.617482 0.676992 0.238779 0.203889 0.176697 0.156819 0.605016 0.754001
1987-11-23 21:00:00 0.379931 0.625995 0.634160 0.600541 0.617482 0.274423 0.238779 0.203889 0.176697 0.540835 0.605016
1987-11-24 21:00:00 0.370729 0.565758 0.625995 0.634160 0.600541 0.292554 0.274423 0.238779 0.203889 0.501897 0.540835
1987-11-25 21:00:00 0.345503 0.445461 0.565758 0.625995 0.634160 0.306019 0.292554 0.274423 0.238779 0.387065 0.501897

In [4]:
# convert the dataframes in train_batches to the correct input and output shape (and make it arrays)
def convert_to_keras_input(batches):
    input_batches = []
    output_batches = []
    for batch in batches:
        input_df = batch.iloc[:,1:]
        output_df = batch['cl_kadij_out']
        input_batches.append(input_df.as_matrix()[:,np.newaxis,:])
        output_batches.append(output_df.as_matrix())
    return (input_batches, output_batches)

(input_train_batches, output_train_batches) = convert_to_keras_input(train_batches)

In [5]:
# create a Keras model of the NN
n_cells = 4
input_shape = input_train_batches[0].shape[1:]

model = keras.models.Sequential()
model.add(keras.layers.LSTM(n_cells, input_shape=input_shape, return_sequences=True))
model.add(keras.layers.LSTM(n_cells))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 1, 4)              240       
_________________________________________________________________
lstm_2 (LSTM)                (None, 4)                 144       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
=================================================================
Total params: 389
Trainable params: 389
Non-trainable params: 0
_________________________________________________________________

In [6]:
%%time
# train the model (this will take hours!)

n_epochs = 1000
report_at = 25
for i in range(0, n_epochs):
    first_batch = 0
    if i % report_at == 0:
        print('At epoch', i)
        model.fit(input_train_batches[0], output_train_batches[0], epochs=1, batch_size=len(output_train_batches[0]), verbose=2)
        first_batch = 1
    for j in range(first_batch,len(input_train_batches)):
        model.fit(input_train_batches[j], output_train_batches[j], epochs=1, batch_size=len(output_train_batches[j]), verbose=0)


At epoch 0
Epoch 1/1
1s - loss: 0.1580
At epoch 25
Epoch 1/1
0s - loss: 9.2738e-04
At epoch 50
Epoch 1/1
0s - loss: 9.1536e-04
At epoch 75
Epoch 1/1
0s - loss: 8.7842e-04
At epoch 100
Epoch 1/1
0s - loss: 8.4336e-04
At epoch 125
Epoch 1/1
0s - loss: 8.3015e-04
At epoch 150
Epoch 1/1
0s - loss: 8.5183e-04
At epoch 175
Epoch 1/1
0s - loss: 8.5729e-04
At epoch 200
Epoch 1/1
0s - loss: 8.5354e-04
At epoch 225
Epoch 1/1
0s - loss: 8.4674e-04
At epoch 250
Epoch 1/1
0s - loss: 8.4350e-04
At epoch 275
Epoch 1/1
0s - loss: 8.3171e-04
At epoch 300
Epoch 1/1
0s - loss: 8.3943e-04
At epoch 325
Epoch 1/1
0s - loss: 8.0809e-04
At epoch 350
Epoch 1/1
0s - loss: 7.8491e-04
At epoch 375
Epoch 1/1
0s - loss: 7.9298e-04
At epoch 400
Epoch 1/1
0s - loss: 7.9735e-04
At epoch 425
Epoch 1/1
0s - loss: 7.9130e-04
At epoch 450
Epoch 1/1
0s - loss: 7.9296e-04
At epoch 475
Epoch 1/1
0s - loss: 7.9487e-04
At epoch 500
Epoch 1/1
0s - loss: 7.8669e-04
At epoch 525
Epoch 1/1
0s - loss: 7.8290e-04
At epoch 550
Epoch 1/1
0s - loss: 7.7617e-04
At epoch 575
Epoch 1/1
0s - loss: 7.7052e-04
At epoch 600
Epoch 1/1
0s - loss: 7.5806e-04
At epoch 625
Epoch 1/1
0s - loss: 7.4854e-04
At epoch 650
Epoch 1/1
0s - loss: 7.4485e-04
At epoch 675
Epoch 1/1
0s - loss: 7.3673e-04
At epoch 700
Epoch 1/1
0s - loss: 7.3419e-04
At epoch 725
Epoch 1/1
0s - loss: 7.2986e-04
At epoch 750
Epoch 1/1
0s - loss: 7.2416e-04
At epoch 775
Epoch 1/1
0s - loss: 7.2258e-04
At epoch 800
Epoch 1/1
0s - loss: 7.2318e-04
At epoch 825
Epoch 1/1
0s - loss: 7.1825e-04
At epoch 850
Epoch 1/1
0s - loss: 7.1810e-04
At epoch 875
Epoch 1/1
0s - loss: 7.1601e-04
At epoch 900
Epoch 1/1
0s - loss: 7.1230e-04
At epoch 925
Epoch 1/1
0s - loss: 7.1012e-04
At epoch 950
Epoch 1/1
0s - loss: 7.1202e-04
At epoch 975
Epoch 1/1
0s - loss: 7.1357e-04
Wall time: 2h 18min 42s

In [7]:
# write the model to a file, don't forget to change the output filename otherwise it won't write!
output_filename = 'models/2layer_lstm_4cells_noKadij'

from pathlib import Path
output_file = Path(output_filename + '.h5')
if output_file.is_file():
    print('Please choose a different filename, this one already exists!')
else:
    model.save(output_filename + '.h5')
    info_file = open(output_filename + '_info.txt', 'w')
    info_file.write('number of epochs: ' + str(n_epochs) + '\n')
    info_file.write('number of cells per layer: ' + str(n_cells) + '\n')
    info_file.write('including cl_kadij in input:' + str(include_cl_kadij_input) + '\n\n')
    model.summary(print_fn=lambda x: info_file.write(x + '\n'))
    info_file.close()

In [ ]: