In [1]:
# -*- coding: UTF-8 -*-
#%load_ext autoreload
%reload_ext autoreload
%autoreload 2
In [2]:
from __future__ import division
import tensorflow as tf
from os import path, remove
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import StratifiedShuffleSplit
from time import time
from matplotlib import pyplot as plt
import seaborn as sns
from mylibs.jupyter_notebook_helper import show_graph, renderStatsList, renderStatsCollection, \
renderStatsListWithLabels, renderStatsCollectionOfCrossValids, plot_res_gp, my_plot_convergence
from tensorflow.contrib import rnn
from tensorflow.contrib import learn
import shutil
from tensorflow.contrib.learn.python.learn import learn_runner
from mylibs.tf_helper import getDefaultGPUconfig
from sklearn.metrics import r2_score
from mylibs.py_helper import factors
from fastdtw import fastdtw
from collections import OrderedDict
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint
from common_33 import get_or_run_nn
from skopt.space.space import Integer, Real
from skopt import gp_minimize
from skopt.plots import plot_convergence
import pickle
import inspect
import dill
import sys
from models.model_36_price_history_autoencoder import PriceHistoryAutoencoder
from data_providers.data_provider_33_price_history_autoencoder import PriceHistoryAutoEncDataProvider
#from gp_opt.price_history_27_gp_opt import PriceHistoryGpOpt
In [3]:
dtype = tf.float32
seed = 16011984
random_state = np.random.RandomState(seed=seed)
config = getDefaultGPUconfig()
n_jobs = 1
%matplotlib inline
In [4]:
factors(689)
Out[4]:
In [5]:
max_seq_len = 682
In [6]:
data_path = '../../../../Dropbox/data'
In [7]:
phae_path = data_path + '/price_hist_autoencoder'
In [8]:
npz_dates = phae_path + '/price_history_full_seqs_dates.npz'
assert path.isfile(npz_dates)
In [9]:
npz_train = phae_path + '/price_history_seqs_dates_normed_train.npz'
assert path.isfile(npz_train)
In [10]:
npz_test = phae_path + '/price_history_seqs_dates_normed_test.npz'
assert path.isfile(npz_test)
In [11]:
npz_path = npz_train[:-len('_train.npz')]
In [12]:
for key, val in np.load(npz_train).iteritems():
print key, ",", val.shape
In [13]:
# dp = PriceHistoryAutoEncDataProvider(npz_path=npz_path, batch_size=53, with_EOS=False)
# for data in dp.datalist:
# print data.shape
(689, 682, 7)
(689, 682, 6)
(689,)
(689, 682)
In [14]:
# for item in dp.next():
# print item.shape
In [15]:
# model = PriceHistoryAutoencoder(rng=random_state, dtype=dtype, config=config)
# graph = model.getGraph(batch_size=53,
# #the way we have it these two must be equal for now
# enc_num_units = 10,
# hidden_enc_num_units = 10,
# hidden_enc_dim = 12,
# hidden_dec_dim = 13,
# #the way we have it these two must be equal for now
# hidden_dec_num_units = 14,
# dec_num_units = 14,
# ts_len=max_seq_len)
In [16]:
#show_graph(graph)
In [17]:
model = PriceHistoryAutoencoder(rng=random_state, dtype=dtype, config=config)
In [18]:
npz_test = npz_path + '_test.npz'
assert path.isfile(npz_test)
path.abspath(npz_test)
Out[18]:
In [19]:
def experiment():
return model.run(npz_path=npz_path,
epochs=50,
batch_size = 13,
enc_num_units = 250,
hidden_enc_num_units = 250,
hidden_enc_dim = 101,
hidden_dec_dim = 101,
hidden_dec_num_units = 250,
dec_num_units = 250,
ts_len=max_seq_len,
learning_rate = 1e-3,
learning_rate_diff = 1e-5,
preds_gather_enabled = True,
)
In [20]:
#%%time
# dyn_stats_dic, preds_dict, targets, twods = experiment()
dyn_stats_dic, preds_dict, targets, twods = get_or_run_nn(experiment, filename='036_autoencoder_000',
nn_runs_folder = data_path + "/nn_runs")
In [21]:
dyn_stats_dic['dyn_stats'].plotStats()
plt.show()
dyn_stats_dic['dyn_stats_diff'].plotStats()
plt.show()
In [22]:
r2_scores = [r2_score(y_true=targets[ind], y_pred=preds_dict[ind])
for ind in range(len(targets))]
In [23]:
ind = np.argmin(r2_scores)
ind
Out[23]:
In [24]:
reals = targets[ind]
preds = preds_dict[ind]
In [25]:
r2_score(y_true=reals, y_pred=preds)
Out[25]:
In [26]:
#sns.tsplot(data=dp.inputs[ind].flatten())
In [27]:
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [28]:
%%time
dtw_scores = [fastdtw(targets[ind], preds_dict[ind])[0]
for ind in range(len(targets))]
In [29]:
np.mean(dtw_scores)
Out[29]:
In [30]:
coint(preds, reals)
Out[30]:
In [45]:
cur_ind = np.random.randint(len(targets))
reals = targets[cur_ind]
preds = preds_dict[cur_ind]
fig = plt.figure(figsize=(15,6))
plt.plot(reals, 'b', label='reals')
plt.plot(preds, 'g')
plt.legend(['reals','preds'])
plt.show()
In [32]:
twod_arr = np.array(twods.values())
twod_arr.shape
Out[32]:
In [33]:
plt.figure(figsize=(16,7))
plt.plot(twod_arr[:, 0], twod_arr[:, 1], 'r.')
plt.title('two dimensional representation of our time series after dimensionality reduction')
plt.xlabel('first dimension')
plt.ylabel('second dimension')
plt.show()
In [ ]: