In [1]:
# -*- coding: UTF-8 -*-
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
In [2]:
from __future__ import division
import tensorflow as tf
from os import path
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import StratifiedShuffleSplit
from time import time
from matplotlib import pyplot as plt
import seaborn as sns
from mylibs.jupyter_notebook_helper import show_graph
from tensorflow.contrib import rnn
from tensorflow.contrib import learn
import shutil
from tensorflow.contrib.learn.python.learn import learn_runner
from mylibs.tf_helper import getDefaultGPUconfig
from sklearn.metrics import r2_score
from mylibs.py_helper import factors
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint
from common import get_or_run_nn
from data_providers.price_history_dummy_seq2seq_data_provider \
import PriceHistoryDummySeq2SeqDataProvider
from models.price_history_dummy_seq2seq import PriceHistoryDummySeq2Seq
In [3]:
dtype = tf.float32
seed = 16011984
random_state = np.random.RandomState(seed=seed)
config = getDefaultGPUconfig()
%matplotlib inline
vocab_size and max sequence length are the SAME thing
decoder RNN hidden units are usually same size as encoder RNN hidden units in translation but for our case it does not seem really to be a relationship there but we can experiment and find out later, not a priority thing right now
In [4]:
num_epochs = 10
num_features = 1
num_units = 400 #state size
input_len = 60
target_len = 30
batch_size = 47
#trunc_backprop_len = ??
In [5]:
npz_path = '../price_history_03_dp_60to30_from_fixed_len.npz'
In [6]:
dp = PriceHistoryDummySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size,
)
dp.inputs.shape, dp.targets.shape
Out[6]:
In [7]:
aa, bb, cc = dp.next()
aa.shape, bb.shape, cc.shape
Out[7]:
In [10]:
model = PriceHistoryDummySeq2Seq(rng=random_state, dtype=dtype, config=config)
In [11]:
graph = model.getGraph(batch_size=batch_size,
num_units=num_units,
input_len=input_len,
target_len=target_len)
In [12]:
#show_graph(graph)
RECALL: baseline is around 4 for huber loss for current problem, anything above 4 should be considered as major errors
In [15]:
rnn_cell = PriceHistorySeq2Seq.RNN_CELLS.BASIC_RNN
num_epochs = 50
num_epochs, num_units, batch_size
Out[15]:
In [16]:
def experiment():
return model.run(
npz_path=npz_path,
epochs=num_epochs,
batch_size=batch_size,
num_units=num_units,
input_len = input_len,
target_len = target_len,
rnn_cell=rnn_cell,
)
In [17]:
dyn_stats = experiment()
#dyn_stats, preds_dict = get_or_run_nn(experiment, filename='003_rnn_slidewin_60to1')
In [18]:
dyn_stats.plotStats()
plt.show()
In [15]:
rnn_cell = PriceHistorySeq2Seq.RNN_CELLS.BASIC_RNN
num_epochs = 50
num_epochs, num_units, batch_size
Out[15]:
In [21]:
def experiment():
return model.run(
npz_path=npz_path,
epochs=num_epochs,
batch_size=batch_size,
num_units=num_units,
input_len = input_len,
target_len = target_len,
rnn_cell=rnn_cell,
)
In [22]:
dyn_stats, preds_dict = get_or_run_nn(experiment, filename='004_rnn_dummy_seq2seq_60to30')
In [23]:
dyn_stats.plotStats()
plt.show()
In [24]:
r2_scores = [r2_score(y_true=dp.targets[ind], y_pred=preds_dict[ind])
for ind in range(len(dp.targets))]
In [25]:
ind = np.argmin(r2_scores)
ind
Out[25]:
In [26]:
reals = dp.targets[ind]
preds = preds_dict[ind]
In [27]:
r2_score(y_true=reals, y_pred=preds)
Out[27]:
In [28]:
sns.tsplot(data=dp.inputs[ind].flatten())
Out[28]:
In [29]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [30]:
%%time
dtw_scores = [fastdtw(dp.targets[ind], preds_dict[ind])[0]
for ind in range(len(dp.targets))]
In [31]:
np.mean(dtw_scores)
Out[31]:
In [32]:
coint(preds, reals)
Out[32]:
In [37]:
cur_ind = np.random.randint(len(dp.targets))
reals = dp.targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [39]:
rnn_cell = PriceHistorySeq2Seq.RNN_CELLS.GRU
num_epochs = 20
num_epochs, num_units, batch_size
Out[39]:
In [42]:
def experiment():
return model.run(
npz_path=npz_path,
epochs=num_epochs,
batch_size=batch_size,
num_units=num_units,
input_len = input_len,
target_len = target_len,
rnn_cell=rnn_cell,
)
In [43]:
#dyn_stats = experiment()
dyn_stats, preds_dict = get_or_run_nn(experiment, filename='004_rnn_dummy_seq2seq_gru_60to30')
In [44]:
dyn_stats.plotStats()
plt.show()
In [45]:
r2_scores = [r2_score(y_true=dp.targets[ind], y_pred=preds_dict[ind])
for ind in range(len(dp.targets))]
In [46]:
ind = np.argmin(r2_scores)
ind
Out[46]:
In [47]:
reals = dp.targets[ind]
preds = preds_dict[ind]
In [48]:
r2_score(y_true=reals, y_pred=preds)
Out[48]:
In [49]:
sns.tsplot(data=dp.inputs[ind].flatten())
Out[49]:
In [50]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [51]:
%%time
dtw_scores = [fastdtw(dp.targets[ind], preds_dict[ind])[0]
for ind in range(len(dp.targets))]
In [52]:
np.mean(dtw_scores)
Out[52]:
In [53]:
coint(preds, reals)
Out[53]:
In [59]:
cur_ind = np.random.randint(len(dp.targets))
reals = dp.targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [ ]: