In [1]:

    
# -*- coding: UTF-8 -*-
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2

https://r2rt.com/recurrent-neural-networks-in-tensorflow-iii-variable-length-sequences.html



In [2]:

    
from __future__ import division
import tensorflow as tf
from os import path
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import StratifiedShuffleSplit
from time import time
from matplotlib import pyplot as plt
import seaborn as sns
from mylibs.jupyter_notebook_helper import show_graph
from tensorflow.contrib import rnn
from tensorflow.contrib import learn
import shutil
from tensorflow.contrib.learn.python.learn import learn_runner
from IPython.display import Image
from IPython.core.display import HTML
from mylibs.tf_helper import getDefaultGPUconfig
from data_providers.binary_shifter_varlen_data_provider import \
    BinaryShifterVarLenDataProvider
from data_providers.price_history_varlen_data_provider import PriceHistoryVarLenDataProvider
from models.price_history_rnn_varlen import PriceHistoryRnnVarlen
from sklearn.metrics import r2_score



In [3]:

    
dtype = tf.float32
seed = 16011984
random_state = np.random.RandomState(seed=seed)
config = getDefaultGPUconfig()
%matplotlib inline

Step 0 - hyperparams



In [4]:

    
num_epochs = 10
series_max_len = 60
num_features = 1  #just one here, the function we are predicting is one-dimensional
state_size = 400
target_len = 30
batch_size = 58



In [5]:

    
# num_batches = series_max_len // batch_size
# print series_max_len / batch_size
# num_batches

Step 1 - collect data (and/or generate them)



In [6]:

    
csv_in = '../price_history_02a_fixed_width.csv'
npz_path = '../price_history_02_dp_60to30_from_fixed_len.npz'



In [7]:

    
# XX, YY, sequence_lens, seq_mask = PriceHistoryVarLenDataProvider.createAndSaveDataset(
#     csv_in=csv_in,
#     npz_out=npz_path,
#     input_seq_len=60, target_seq_len=30)
# XX.shape, YY.shape, sequence_lens.shape, seq_mask.shape



In [8]:

    
dp = PriceHistoryVarLenDataProvider(filteringSeqLens = lambda xx : xx >= target_len,
                                    npz_path=npz_path)
dp.inputs.shape, dp.targets.shape, dp.sequence_lengths.shape, dp.sequence_masks.shape









    Out[8]:





((14036, 60, 1), (14036, 30), (14036,), (14036, 60))

Step 2 - Build model



In [10]:

    
model = PriceHistoryRnnVarlen(rng=random_state, dtype=dtype, config=config)



In [11]:

    
graph = model.getGraph(batch_size=batch_size, state_size=state_size,
                       target_len=target_len, series_max_len=series_max_len)









    



learning rate: 0.001000
rnn_outputs:
Tensor("rnn_layer/rnn/transpose:0", shape=(58, 60, 400), dtype=float32)

Tensor("gathering/GatherNd:0", shape=(58, 30, 400), dtype=float32)

Tensor("flattening:0", shape=(1740, 400), dtype=float32)
Tensor("readout_layer/add:0", shape=(1740, 1), dtype=float32)

Tensor("predictions/Reshape:0", shape=(58, 30), dtype=float32)

Tensor("huber_loss/Select:0", shape=(58, 30), dtype=float32)

Tensor("huber_loss/Mean:0", shape=(), dtype=float32)



In [12]:

    
show_graph(graph)

Step 3 training the network



In [14]:

    
num_epochs, state_size, batch_size









    Out[14]:





(10, 400, 58)



In [15]:

    
dynStats, predictions_dict = model.run(epochs=num_epochs,
                                        state_size=state_size,
                                         series_max_len=series_max_len,
                                         target_len=target_len,
                                         npz_path=npz_path,
                                         batch_size=batch_size)









    



epochs: 10
End Epoch 01 (18.055 secs): err(train) = 13.4618
End Epoch 02 (17.666 secs): err(train) = 12.8156
End Epoch 03 (17.653 secs): err(train) = 12.6635
End Epoch 04 (17.656 secs): err(train) = 12.5709
End Epoch 05 (17.669 secs): err(train) = 12.5482
End Epoch 06 (17.913 secs): err(train) = 12.4407
End Epoch 07 (17.700 secs): err(train) = 12.8802
End Epoch 08 (17.684 secs): err(train) = 12.5011
End Epoch 09 (17.671 secs): err(train) = 12.3853
End Epoch 10 (17.669 secs): err(train) = 12.3945



In [16]:

    
dynStats.plotStats()









    Out[16]:





([<matplotlib.figure.Figure at 0x7fe2ac4b71d0>],
 [<matplotlib.axes._subplots.AxesSubplot at 0x7fe2ac4358d0>])



In [17]:

    
dp.inputs[0].shape









    Out[17]:





(60, 1)



In [62]:

    
sns.tsplot(data=dp.inputs[2629].flatten())









    Out[62]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe27572a350>



In [49]:

    
#data = np.hstack( (dp.targets[0][np.newaxis].T, predictions_dict[0][np.newaxis].T) ).shape
#data = {'real': dp.targets[0], 'preds': predictions_dict[0] }
# tsdf = pd.DataFrame(data)
# tsdf.shape



In [59]:

    
ind = 2629 #14036 - 1



In [60]:

    
reals = dp.targets[ind]
preds = predictions_dict[ind]



In [61]:

    
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()



In [51]:

    
r2_score(y_true=reals, y_pred=preds)









    Out[51]:





-2.0562176569109112



In [52]:

    
r2_scores = [r2_score(y_true=dp.targets[ind], y_pred=predictions_dict[ind])
            for ind in range(len(dp.targets))]



In [58]:

    
np.argmin(r2_scores)









    Out[58]:





2629



In [ ]: