In [1]:
import os
import operator
import random
import pandas as pd
import numpy as np
import tensorflow.contrib.keras as keras
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from vixstructure.utils import parse_model_repr
from vixstructure.data import LongPricesDataset
In [2]:
directory, _, files = next(os.walk("models/experiment04"))
h5_files = tuple(file for file in files if os.path.splitext(file)[1] == ".h5")
csv_files = tuple(file for file in files if os.path.splitext(file)[1] == ".csv")
stats = [(parse_model_repr(os.path.splitext(file)[0]),
pd.read_csv(os.path.join(directory, file), header=0, index_col=0,
dtype={"epoch":int, "loss":np.float32, "val_loss":np.float32,
"denorm_mse":np.float32, "val_denorm_mse":np.float32}))
for file in csv_files]
parameters, stats_data = zip(*stats)
for d in stats_data:
if len(d.columns) == 2:
use_this_as_columns = d.columns
print(use_this_as_columns)
break
for d in stats_data:
if len(d.columns) == 4:
d.drop(["loss", "val_loss"], axis=1, inplace=True)
d.columns = use_this_as_columns
dataframe = pd.concat(stats_data, keys=[(p.depth, p.width, p.normalized) for p in parameters],
names=["depth", "width", "normalized"])
In [3]:
dataframe.sort_index(inplace=True)
In [4]:
def reduce_mean_std(dataframe):
groups = dataframe.groupby(("depth", "width", "normalized", "epoch"))
mean = groups.mean()
std = groups.std()
return pd.concat((mean, std), axis=1, keys=("mean", "std"))
dataframe_reduced = reduce_mean_std(dataframe)
In [5]:
print(dataframe_reduced.min(), dataframe_reduced.idxmin(), sep="\n")
In [6]:
normal = dataframe_reduced.loc[1,:,True]
basic = dataframe_reduced.loc[1,:,False]
In [7]:
basic.plot(figsize=(16,9))
plt.ylim(0,0.25)
plt.show()
In [8]:
normal.plot(figsize=(16,9))
plt.ylim(0,0.25)
plt.show()
In [9]:
pd.concat((basic.groupby("width").min()["mean", "val_loss"], normal.groupby("width").min()["mean", "val_loss"]),
axis=1, keys=("without normalization", "normalized")).plot(figsize=(10,5))
plt.title("Validation loss of best epoch")
plt.show()
In [10]:
hostnamedate = pd.concat(stats_data, keys=[(p.depth, p.width, p.normalized, p.hostname) for p in parameters],
names=["depth", "width", "normalized", "hostname"])
hostnamedate.sort_index(inplace=True)
In [12]:
hostnamedate.loc[1,50,False]["val_loss"].groupby("hostname").min()
Out[12]:
In [19]:
hostnamedate.loc[1,50,False,:,999]
Out[19]:
In [21]:
print(dataframe.min(), dataframe.idxmin(), sep="\n")