notebook.community

Edit and run



In [1]:

    
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline









    



Using TensorFlow backend.



In [2]:

    
# read the train batches from files created by the Preprocessing_NN_Data notebook
def read_batches(filepath, include_cl_kadij_input=True):
    batches = []
    number_read = -1
    while True:
        number_read += 1
        try:
            batch = pd.read_csv(filepath + '_' + str(number_read) + '.csv', index_col=0, parse_dates=True)
            if not include_cl_kadij_input:
                batch = batch.drop('cl_kadij_1', 1)
            batches.append(batch)
        except FileNotFoundError:
            break
        
    return batches

include_cl_kadij_input = False #used to toggle whether cl_kadij_1 (yesterday's value) is included in input
train_batches = read_batches('data/train/batch', include_cl_kadij_input)



In [3]:

    
#print an example batch to see what they look like
batch = train_batches[0]
batch.head()









    Out[3]:







  
    
      
      cl_kadij_out
      cl_lobith_3
      cl_lobith_4
      cl_lobith_5
      cl_lobith_6
      afv_lobith_6
      afv_lobith_7
      afv_lobith_8
      afv_lobith_9
      cum_stand_1
      cum_stand_2
    
  
  
    
      1987-11-21 21:00:00
      0.471389
      0.600541
      0.617482
      0.676992
      0.730888
      0.203889
      0.176697
      0.156819
      0.146625
      0.754001
      0.491503
    
    
      1987-11-22 21:00:00
      0.407675
      0.634160
      0.600541
      0.617482
      0.676992
      0.238779
      0.203889
      0.176697
      0.156819
      0.605016
      0.754001
    
    
      1987-11-23 21:00:00
      0.379931
      0.625995
      0.634160
      0.600541
      0.617482
      0.274423
      0.238779
      0.203889
      0.176697
      0.540835
      0.605016
    
    
      1987-11-24 21:00:00
      0.370729
      0.565758
      0.625995
      0.634160
      0.600541
      0.292554
      0.274423
      0.238779
      0.203889
      0.501897
      0.540835
    
    
      1987-11-25 21:00:00
      0.345503
      0.445461
      0.565758
      0.625995
      0.634160
      0.306019
      0.292554
      0.274423
      0.238779
      0.387065
      0.501897



In [4]:

    
# convert the dataframes in train_batches to the correct input and output shape (and make it arrays)
def convert_to_keras_input(batches):
    input_batches = []
    output_batches = []
    for batch in batches:
        input_df = batch.iloc[:,1:]
        output_df = batch['cl_kadij_out']
        input_batches.append(input_df.as_matrix()[:,np.newaxis,:])
        output_batches.append(output_df.as_matrix())
    return (input_batches, output_batches)

(input_train_batches, output_train_batches) = convert_to_keras_input(train_batches)



In [5]:

    
# create a Keras model of the NN
n_cells = 4
input_shape = input_train_batches[0].shape[1:]

model = keras.models.Sequential()
model.add(keras.layers.LSTM(n_cells, input_shape=input_shape, return_sequences=True))
model.add(keras.layers.LSTM(n_cells))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

model.summary()









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 1, 4)              240       
_________________________________________________________________
lstm_2 (LSTM)                (None, 4)                 144       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
=================================================================
Total params: 389
Trainable params: 389
Non-trainable params: 0
_________________________________________________________________



In [6]:

    
%%time
# train the model (this will take hours!)

n_epochs = 1000
report_at = 25
for i in range(0, n_epochs):
    first_batch = 0
    if i % report_at == 0:
        print('At epoch', i)
        model.fit(input_train_batches[0], output_train_batches[0], epochs=1, batch_size=len(output_train_batches[0]), verbose=2)
        first_batch = 1
    for j in range(first_batch,len(input_train_batches)):
        model.fit(input_train_batches[j], output_train_batches[j], epochs=1, batch_size=len(output_train_batches[j]), verbose=0)









    



At epoch 0
Epoch 1/1
1s - loss: 0.1580
At epoch 25
Epoch 1/1
0s - loss: 9.2738e-04
At epoch 50
Epoch 1/1
0s - loss: 9.1536e-04
At epoch 75
Epoch 1/1
0s - loss: 8.7842e-04
At epoch 100
Epoch 1/1
0s - loss: 8.4336e-04
At epoch 125
Epoch 1/1
0s - loss: 8.3015e-04
At epoch 150
Epoch 1/1
0s - loss: 8.5183e-04
At epoch 175
Epoch 1/1
0s - loss: 8.5729e-04
At epoch 200
Epoch 1/1
0s - loss: 8.5354e-04
At epoch 225
Epoch 1/1
0s - loss: 8.4674e-04
At epoch 250
Epoch 1/1
0s - loss: 8.4350e-04
At epoch 275
Epoch 1/1
0s - loss: 8.3171e-04
At epoch 300
Epoch 1/1
0s - loss: 8.3943e-04
At epoch 325
Epoch 1/1
0s - loss: 8.0809e-04
At epoch 350
Epoch 1/1
0s - loss: 7.8491e-04
At epoch 375
Epoch 1/1
0s - loss: 7.9298e-04
At epoch 400
Epoch 1/1
0s - loss: 7.9735e-04
At epoch 425
Epoch 1/1
0s - loss: 7.9130e-04
At epoch 450
Epoch 1/1
0s - loss: 7.9296e-04
At epoch 475
Epoch 1/1
0s - loss: 7.9487e-04
At epoch 500
Epoch 1/1
0s - loss: 7.8669e-04
At epoch 525
Epoch 1/1
0s - loss: 7.8290e-04
At epoch 550
Epoch 1/1
0s - loss: 7.7617e-04
At epoch 575
Epoch 1/1
0s - loss: 7.7052e-04
At epoch 600
Epoch 1/1
0s - loss: 7.5806e-04
At epoch 625
Epoch 1/1
0s - loss: 7.4854e-04
At epoch 650
Epoch 1/1
0s - loss: 7.4485e-04
At epoch 675
Epoch 1/1
0s - loss: 7.3673e-04
At epoch 700
Epoch 1/1
0s - loss: 7.3419e-04
At epoch 725
Epoch 1/1
0s - loss: 7.2986e-04
At epoch 750
Epoch 1/1
0s - loss: 7.2416e-04
At epoch 775
Epoch 1/1
0s - loss: 7.2258e-04
At epoch 800
Epoch 1/1
0s - loss: 7.2318e-04
At epoch 825
Epoch 1/1
0s - loss: 7.1825e-04
At epoch 850
Epoch 1/1
0s - loss: 7.1810e-04
At epoch 875
Epoch 1/1
0s - loss: 7.1601e-04
At epoch 900
Epoch 1/1
0s - loss: 7.1230e-04
At epoch 925
Epoch 1/1
0s - loss: 7.1012e-04
At epoch 950
Epoch 1/1
0s - loss: 7.1202e-04
At epoch 975
Epoch 1/1
0s - loss: 7.1357e-04
Wall time: 2h 18min 42s



In [7]:

    
# write the model to a file, don't forget to change the output filename otherwise it won't write!
output_filename = 'models/2layer_lstm_4cells_noKadij'

from pathlib import Path
output_file = Path(output_filename + '.h5')
if output_file.is_file():
    print('Please choose a different filename, this one already exists!')
else:
    model.save(output_filename + '.h5')
    info_file = open(output_filename + '_info.txt', 'w')
    info_file.write('number of epochs: ' + str(n_epochs) + '\n')
    info_file.write('number of cells per layer: ' + str(n_cells) + '\n')
    info_file.write('including cl_kadij in input:' + str(include_cl_kadij_input) + '\n\n')
    model.summary(print_fn=lambda x: info_file.write(x + '\n'))
    info_file.close()



In [ ]:

	cl_kadij_out	cl_lobith_3	cl_lobith_4	cl_lobith_5	cl_lobith_6	afv_lobith_6	afv_lobith_7	afv_lobith_8	afv_lobith_9	cum_stand_1	cum_stand_2
1987-11-21 21:00:00	0.471389	0.600541	0.617482	0.676992	0.730888	0.203889	0.176697	0.156819	0.146625	0.754001	0.491503
1987-11-22 21:00:00	0.407675	0.634160	0.600541	0.617482	0.676992	0.238779	0.203889	0.176697	0.156819	0.605016	0.754001
1987-11-23 21:00:00	0.379931	0.625995	0.634160	0.600541	0.617482	0.274423	0.238779	0.203889	0.176697	0.540835	0.605016
1987-11-24 21:00:00	0.370729	0.565758	0.625995	0.634160	0.600541	0.292554	0.274423	0.238779	0.203889	0.501897	0.540835
1987-11-25 21:00:00	0.345503	0.445461	0.565758	0.625995	0.634160	0.306019	0.292554	0.274423	0.238779	0.387065	0.501897