In [1]:
# -*- coding: UTF-8 -*-
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
In [3]:
from __future__ import division
import tensorflow as tf
from os import path
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import StratifiedShuffleSplit
from time import time
from matplotlib import pyplot as plt
import seaborn as sns
from mylibs.jupyter_notebook_helper import show_graph
from tensorflow.contrib import rnn
from tensorflow.contrib import learn
import shutil
from tensorflow.contrib.learn.python.learn import learn_runner
from IPython.display import Image
from IPython.core.display import HTML
from mylibs.tf_helper import getDefaultGPUconfig
from data_providers.binary_shifter_varlen_data_provider import \
BinaryShifterVarLenDataProvider
from data_providers.price_history_varlen_data_provider import PriceHistoryVarLenDataProvider
from models.model_05_price_history_rnn_varlen import PriceHistoryRnnVarlen
from sklearn.metrics import r2_score
from mylibs.py_helper import factors
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint
In [4]:
dtype = tf.float32
seed = 16011984
random_state = np.random.RandomState(seed=seed)
config = getDefaultGPUconfig()
%matplotlib inline
In [5]:
from common import get_or_run_nn
In [6]:
num_epochs = 10
series_max_len = 60
num_features = 1 #just one here, the function we are predicting is one-dimensional
state_size = 400
target_len = 30
batch_size = 47
In [7]:
csv_in = '../price_history_03a_fixed_width.csv'
npz_path = '../price_history_03_dp_60to30_from_fixed_len.npz'
In [8]:
# XX, YY, sequence_lens, seq_mask = PriceHistoryVarLenDataProvider.createAndSaveDataset(
# csv_in=csv_in,
# npz_out=npz_path,
# input_seq_len=60, target_seq_len=30)
# XX.shape, YY.shape, sequence_lens.shape, seq_mask.shape
In [9]:
dp = PriceHistoryVarLenDataProvider(filteringSeqLens = lambda xx : xx >= target_len,
npz_path=npz_path)
dp.inputs.shape, dp.targets.shape, dp.sequence_lengths.shape, dp.sequence_masks.shape
Out[9]:
In [10]:
model = PriceHistoryRnnVarlen(rng=random_state, dtype=dtype, config=config)
In [11]:
graph = model.getGraph(batch_size=batch_size, state_size=state_size,
rnn_cell= PriceHistoryRnnVarlen.RNN_CELLS.GRU,
target_len=target_len, series_max_len=series_max_len)
In [12]:
show_graph(graph)
In [12]:
rnn_cell = PriceHistoryRnnVarlen.RNN_CELLS.GRU
num_epochs, state_size, batch_size
Out[12]:
In [13]:
def experiment():
dynStats, predictions_dict = model.run(epochs=num_epochs,
rnn_cell=rnn_cell,
state_size=state_size,
series_max_len=series_max_len,
target_len=target_len,
npz_path=npz_path,
batch_size=batch_size)
return dynStats, predictions_dict
In [14]:
dyn_stats, preds_dict = get_or_run_nn(experiment,
filename='002_rnn_gru_60to30')
In [15]:
dyn_stats.plotStats()
plt.show()
In [16]:
r2_scores = [r2_score(y_true=dp.targets[ind], y_pred=preds_dict[ind])
for ind in range(len(dp.targets))]
In [17]:
ind = np.argmin(r2_scores)
ind
Out[17]:
In [18]:
reals = dp.targets[ind]
preds = preds_dict[ind]
In [19]:
r2_score(y_true=reals, y_pred=preds)
Out[19]:
In [20]:
sns.tsplot(data=dp.inputs[ind].flatten())
Out[20]:
In [21]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [22]:
%%time
dtw_scores = [fastdtw(dp.targets[ind], preds_dict[ind])[0]
for ind in range(len(dp.targets))]
In [23]:
np.mean(dtw_scores)
Out[23]:
In [24]:
coint(preds, reals)
Out[24]:
In [25]:
cur_ind = np.random.randint(len(dp.targets))
reals = dp.targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [27]:
rnn_cell = PriceHistoryRnnVarlen.RNN_CELLS.GRU
num_epochs = 50
state_size, batch_size
Out[27]:
In [28]:
def experiment():
dynStats, predictions_dict = model.run(epochs=num_epochs,
rnn_cell=rnn_cell,
state_size=state_size,
series_max_len=series_max_len,
target_len=target_len,
npz_path=npz_path,
batch_size=batch_size)
return dynStats, predictions_dict
In [29]:
dyn_stats, preds_dict = get_or_run_nn(experiment,
filename='002_rnn_gru_60to30_50epochs')
In [30]:
dyn_stats.plotStats()
plt.show()
In [31]:
r2_scores = [r2_score(y_true=dp.targets[ind], y_pred=preds_dict[ind])
for ind in range(len(dp.targets))]
In [32]:
ind = np.argmin(r2_scores)
ind
Out[32]:
In [33]:
reals = dp.targets[ind]
preds = preds_dict[ind]
In [34]:
r2_score(y_true=reals, y_pred=preds)
Out[34]:
In [35]:
sns.tsplot(data=dp.inputs[ind].flatten())
Out[35]:
In [36]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [37]:
%%time
dtw_scores = [fastdtw(dp.targets[ind], preds_dict[ind])[0]
for ind in range(len(dp.targets))]
In [38]:
np.mean(dtw_scores)
Out[38]:
In [39]:
coint(preds, reals)
Out[39]:
In [42]:
cur_ind = np.random.randint(len(dp.targets))
reals = dp.targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [ ]: