In [1]:
# -*- coding: UTF-8 -*-
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
In [2]:
from __future__ import division
import tensorflow as tf
from os import path, remove
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import StratifiedShuffleSplit
from time import time
from matplotlib import pyplot as plt
import seaborn as sns
from mylibs.jupyter_notebook_helper import show_graph, renderStatsList, renderStatsCollection, \
renderStatsListWithLabels, renderStatsCollectionOfCrossValids, plot_res_gp, my_plot_convergence
from tensorflow.contrib import rnn
from tensorflow.contrib import learn
import shutil
from tensorflow.contrib.learn.python.learn import learn_runner
from mylibs.tf_helper import getDefaultGPUconfig
from sklearn.metrics import r2_score
from mylibs.py_helper import factors
from fastdtw import fastdtw
from collections import OrderedDict
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint
from common_33 import get_or_run_nn
from skopt.space.space import Integer, Real
from skopt import gp_minimize
from skopt.plots import plot_convergence
import pickle
import inspect
import dill
import sys
from models.model_33_price_history_autoencoder import PriceHistoryAutoencoder
from data_providers.data_provider_33_price_history_autoencoder import PriceHistoryAutoEncDataProvider
#from gp_opt.price_history_27_gp_opt import PriceHistoryGpOpt
In [3]:
dtype = tf.float32
seed = 16011984
random_state = np.random.RandomState(seed=seed)
config = getDefaultGPUconfig()
n_jobs = 1
%matplotlib inline
In [4]:
factors(689)
Out[4]:
In [5]:
max_seq_len = 682
In [6]:
#full_train_size = 55820
#train_size = 55800
#small_train_size = 6000 #just because of performance reasons, no statistics behind this decision
#test_size = 6200
In [7]:
data_path = '../../../../Dropbox/data'
In [8]:
phae_path = data_path + '/price_hist_autoencoder'
In [9]:
csv_in = '../price_history_03_seq_start_suddens_trimmed.csv'
assert path.isfile(csv_in)
In [10]:
npz_unprocessed = phae_path + '/price_history_full_seqs.npz'
assert path.isfile(npz_unprocessed)
In [11]:
npz_dates = phae_path + '/price_history_full_seqs_dates.npz'
assert path.isfile(npz_dates)
In [12]:
npz_train = phae_path + '/price_history_seqs_dates_normed_train.npz'
assert path.isfile(npz_train)
In [13]:
npz_test = phae_path + '/price_history_seqs_dates_normed_test.npz'
assert path.isfile(npz_test)
In [14]:
npz_path = npz_train[:-len('_train.npz')]
In [15]:
for key, val in np.load(npz_train).iteritems():
print key, ",", val.shape
In [16]:
dp = PriceHistoryAutoEncDataProvider(npz_path=npz_path, batch_size=1, with_EOS=False)
for data in dp.datalist:
print data.shape
In [17]:
# for item in dp.next():
# print item.shape
In [18]:
model = PriceHistoryAutoencoder(rng=random_state, dtype=dtype, config=config)
In [19]:
# graph = model.getGraph(batch_size=batch_size,
# enc_num_units = 10,
# dec_num_units = 10,
# ts_len=max_seq_len)
targets Tensor("data/strided_slice:0", shape=(50, 682), dtype=float32)
Tensor("encoder_rnn_layer/rnn/while/Exit_2:0", shape=(?, 10), dtype=float32)
Tensor("encoder_state_out_process/Elu:0", shape=(?, 2), dtype=float32)
Tensor("decoder_state_in_process/Elu:0", shape=(?, 10), dtype=float32)
Tensor("decoder_rnn_layer/rnn/transpose:0", shape=(50, 682, 10), dtype=float32)
Tensor("decoder_outs/Reshape:0", shape=(34100, 10), dtype=float32)
Tensor("readout_affine/Identity:0", shape=(34100, 1), dtype=float32) Tensor("readout_affine/Reshape:0", shape=(50, 682), dtype=float32)
Tensor("error/mul_1:0", shape=(50, 682), dtype=float32)
Tensor("error/Mean:0", shape=(), dtype=float32) Tensor("error/Mean:0", shape=(), dtype=float32)
In [20]:
#show_graph(graph)
In [21]:
def experiment():
return model.run(npz_path=npz_path,
epochs=100,
batch_size = 13,
enc_num_units = 400,
dec_num_units = 400,
ts_len=max_seq_len,
learning_rate = 1e-4,
preds_gather_enabled = False,
)
dyn_stats = experiment()
In [22]:
dyn_stats.plotStats()
Out[22]:
In [21]:
model = PriceHistoryAutoencoder(rng=random_state, dtype=dtype, config=config)
In [22]:
npz_test = npz_path + '_test.npz'
assert path.isfile(npz_test)
path.abspath(npz_test)
Out[22]:
In [23]:
def experiment():
return model.run(npz_path=npz_path,
epochs=1,
batch_size = 53,
enc_num_units = 50,
dec_num_units = 50,
ts_len=max_seq_len,
learning_rate = 1e-4,
preds_gather_enabled = True,
)
In [24]:
#%%time
dyn_stats, preds_dict, targets, twods = experiment()
# dyn_stats, preds_dict, targets, twods = get_or_run_nn(experiment, filename='033_autoencoder_000',
# nn_runs_folder = data_path + "/nn_runs")
In [27]:
dyn_stats.plotStats()
plt.show()
In [28]:
r2_scores = [r2_score(y_true=targets[ind], y_pred=preds_dict[ind])
for ind in range(len(targets))]
In [29]:
ind = np.argmin(r2_scores)
ind
Out[29]:
In [30]:
reals = targets[ind]
preds = preds_dict[ind]
In [31]:
r2_score(y_true=reals, y_pred=preds)
Out[31]:
In [32]:
#sns.tsplot(data=dp.inputs[ind].flatten())
In [33]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [34]:
%%time
dtw_scores = [fastdtw(targets[ind], preds_dict[ind])[0]
for ind in range(len(targets))]
In [35]:
np.mean(dtw_scores)
Out[35]:
In [36]:
coint(preds, reals)
Out[36]:
In [75]:
cur_ind = np.random.randint(len(targets))
reals = targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b', label='reals')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [ ]: