In [109]:
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from vixstructure.utils import parse_whole_directory_old, parse_whole_directory
from vixstructure.data import LongPricesDataset
from vixstructure.models import term_structure_to_spread_price_v2
In [2]:
def plot3d_loss(dataframe, zlim=None, rotation=225):
X = dataframe.index.levels[0]
Y = dataframe.index.levels[1]
X, Y = np.meshgrid(X, Y)
Z = np.reshape(
np.array(list(map(lambda x: dataframe.get((x[0], x[1]), default=0.45),
np.reshape(np.dstack((X,Y)), (X.shape[0]*X.shape[1],2))))),
X.shape)
fig = plt.figure(figsize=(5, 5))
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, linewidth=None, antialiased=True, cmap=cm.coolwarm_r)
ax.view_init(azim=rotation)
ax.set_xlabel("Depth")
ax.set_ylabel("Width")
ax.set_xlim(X[0,0], X[-1,-1])
ax.set_ylim(Y[0,0], Y[-1,-1])
ax.set_zlim(zlim)
#ax.set_zlabel("Loss", rotation=90)
In [3]:
experiment1 = parse_whole_directory_old("models/experiment01/")
experiment2 = parse_whole_directory_old("models/experiment02/")
experiment3 = parse_whole_directory_old("models/experiment03/")
experiment4 = parse_whole_directory_old("models/experiment04/")
In [4]:
argmin_epchs_basic = np.array([v[3] for v in experiment1["val_loss"].loc[:,:,False].groupby(("depth", "width", "datetime")).idxmin()])
argmin_epchs_normal = np.array([v[3] for v in experiment1["val_loss"].loc[:,:,True].groupby(("depth", "width", "datetime")).idxmin()])
print(argmin_epchs_basic.mean(), argmin_epchs_basic.std(), np.median(argmin_epchs_basic))
print(argmin_epchs_normal.mean(), argmin_epchs_normal.std(), np.median(argmin_epchs_normal))
In [5]:
ex1_min = experiment1.groupby(("depth", "width", "normalized", "datetime")).min()
ex1_min_basic = ex1_min.loc(axis=0)[:,:,False]
ex1_min_normal = ex1_min.loc(axis=0)[:,:,True]
In [6]:
plot3d_loss(ex1_min_basic["loss"].groupby(("depth", "width")).mean(), rotation=150)
plt.savefig("approach1-ex1-loss-basic.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [7]:
plot3d_loss(ex1_min_basic["val_loss"].groupby(("depth", "width")).mean(), rotation=150)
plt.savefig("approach1-ex1-val-basic.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [8]:
plot3d_loss(ex1_min_normal["loss"].groupby(("depth", "width")).mean(), rotation=150)
plt.savefig("approach1-ex1-loss-normal.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [9]:
plot3d_loss(ex1_min_normal["val_loss"].groupby(("depth", "width")).mean(), rotation=150)
plt.savefig("approach1-ex1-val-normal.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [10]:
d1w21_basic = experiment1["val_loss"].loc[1, 24, False]
d1w21_normal = experiment1["val_loss"].loc[1, 24, True]
plt.figure(figsize=(8,3))
ax2 = d1w21_basic.groupby("epoch").std().plot(color="red", linewidth=0.8)
d1w21_normal.groupby("epoch").std().plot(color="darkorange", linewidth=0.8)
ax2.tick_params(axis="y", colors="red")
plt.legend(("std.", "std. (normalized)"), title="")
plt.ylim(0, 0.04)
ax1 = d1w21_basic.groupby("epoch").mean().plot(secondary_y=True, color="blue", linewidth=0.8)
d1w21_normal.groupby("epoch").mean().plot(secondary_y=True, color="violet", linewidth=0.8)
ax1.tick_params(axis="y", colors="blue")
ax1.set_xlabel("")
ax2.set_xlabel("")
plt.legend(("mean", "mean (normalized)"), loc="upper center", title="")
plt.ylim(0.095,0.115)
plt.savefig("approach1-ex1-progression.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [11]:
print(experiment1.min(), experiment1.idxmin(), sep="\n")
In [12]:
ex2_min = experiment2["val_loss"][1,:,False].groupby(("width", "datetime")).min()
In [13]:
ex2_min.groupby("width").mean().plot(figsize=(10,5))
plt.title("Validation loss of best training epoch")
plt.show()
In [14]:
experiment2.loc[1,9,False,:,999]
Out[14]:
In [15]:
ex3_min = experiment3["val_loss"][:,:,False].groupby(("depth", "width", "datetime")).min()
experiment3.groupby(("depth", "width", "normalized", "datetime")).min()
Out[15]:
In [16]:
plot3d_loss(ex3_min.groupby(("depth", "width")).mean())
plt.xticks(np.arange(10, 100, 20))
plt.savefig("approach1-ex3-val-selu.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [17]:
experiment35 = parse_whole_directory_old("models/experiment03.5/")
In [18]:
ex35_min = experiment35["val_loss"][:,:,False].groupby(("depth", "width", "datetime")).min()
experiment35.groupby(("depth", "width", "normalized", "datetime")).min()
plot3d_loss(ex35_min.groupby(("depth", "width")).mean())
plt.xticks(np.arange(10, 100, 20))
plt.savefig("approach1-ex3-val-dropout.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [20]:
ex4_min = experiment4["val_loss"][1].groupby(("width", "normalized", "datetime")).min()
ex4_min_basic = ex4_min[:,False]
ex4_min_normal = ex4_min[:,True]
In [21]:
ex4_min_basic.groupby("width").mean().plot(figsize=(10,5), label="not normalized")
ex4_min_normal.groupby("width").mean().plot(label="normalized")
plt.legend()
plt.title("Validation loss of best training epoch")
plt.show()
In [22]:
ex4_per_width = pd.DataFrame(ex4_min_basic.values.reshape((11, 10)), index=ex4_min_basic.index.levels[0])
width = ex4_per_width.T.mean()
std = ex4_per_width.T.std()
ax = ex4_per_width.plot(colormap=cm.winter, legend=False, figsize=(8,4), alpha=0.8)
width.plot(linewidth=5)
plt.xticks(np.arange(20, 51, 3))
plt.grid()
plt.ylim(0.080, 0.110)
plt.xlabel("")
x = np.arange(20, 51, 3)
xx = np.concatenate((x, x[::-1]))
yy = np.concatenate((width.values + std.values, (width.values - std.values)[::-1]))
plt.fill(xx, yy, color="cyan", alpha=0.5)
#ax.xaxis.set_ticklabels(())
plt.savefig("approach1-ex4-val-basic.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [23]:
ex4_per_width = pd.DataFrame(ex4_min_normal.values.reshape((11, 10)), index=ex4_min_basic.index.levels[0])
width = ex4_per_width.T.mean()
std = ex4_per_width.T.std()
ex4_per_width.plot(colormap=cm.winter, legend=False, figsize=(8,3), alpha=0.8)
width.plot(linewidth=5)
plt.xticks(np.arange(20, 51, 3))
plt.ylim(0.155, 0.185)
plt.grid()
plt.xlabel("")
x = np.arange(20, 51, 3)
xx = np.concatenate((x, x[::-1]))
yy = np.concatenate((width.values + std.values, (width.values - std.values)[::-1]))
plt.fill(xx, yy, color="cyan", alpha=0.5)
plt.savefig("approach1-ex4-val-normal.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [24]:
ex4_min_basic
Out[24]:
In [25]:
def mse(y1, y2):
return np.mean(np.square(y1 - y2))
def mse_prediction(model, splitted_datasets):
return [mse(model.predict(x), y) for x, y in splitted_datasets]
In [107]:
longprices = LongPricesDataset("data/8_m_settle.csv", "data/expirations.csv")
(x_train, y_train), (x_val, y_val), (x_test, y_test) = longprices.splitted_dataset()
In [28]:
naive_mse = [mse(y_train[:-1], y_train[1:]), mse(y_val[:-1], y_val[1:]), mse(y_test[:-1], y_test[1:])]
In [29]:
comp_data = parse_whole_directory("models/comparison1-4/")
In [56]:
#pd.Series(comp_data.index.droplevel([0,1,2,3]), index=comp_data.index).groupby("datetime").max()
epochs= pd.Series([""]).append(
pd.Series(comp_data.index.droplevel([0,1,2,3]), index=comp_data.index).groupby("datetime").max())
epochs.index = ["Naive", "Basic", "Dropout", "SNN", "AddIn"]
epochs["Minutely"] = 70
epochs = epochs.iloc[[0, 1, 2, 3, 5, 4]]
In [53]:
model1 = term_structure_to_spread_price_v2(1, 30, input_data_length=9)
model1.load_weights("models/comparison1-4/20170826150648_schumpeter_depth1_width30_days1_dropout0e+00_optimAdam_lr1e-03.h5")
model2 = term_structure_to_spread_price_v2(1, 30, input_data_length=9, dropout=0.5)
model2.load_weights("models/comparison1-4/20170826150734_schumpeter_depth1_width30_days1_dropout5e-01_optimAdam_lr1e-03.h5")
model3 = term_structure_to_spread_price_v2(1, 30, input_data_length=9, activation_function="selu")
model3.load_weights("models/comparison1-4/20170826151132_schumpeter_depth1_width30_days1_dropout0e+00_optimAdam_lr1e-03.h5")
model4 = term_structure_to_spread_price_v2(1, 30, input_data_length=11)
model4.load_weights("models/comparison1-4/20170826151354_schumpeter_depth1_width30_days1_dropout0e+00_optimAdam_lr1e-03.h5")
model_minutely = term_structure_to_spread_price_v2(1, 30, input_data_length=9)
model_minutely.load_weights("models/experiment12/20170829131147_tfpool42_depth1_width30_days1_dropout0e+00_optimAdam_lr1e-03.h5")
In [54]:
longprices = LongPricesDataset("data/8_m_settle.csv", "data/expirations.csv")
splitted_dataset = longprices.splitted_dataset()
summary = pd.DataFrame([naive_mse,
mse_prediction(model1, splitted_dataset),
mse_prediction(model2, splitted_dataset),
mse_prediction(model3, splitted_dataset),
mse_prediction(model_minutely, splitted_dataset),
mse_prediction(model4, longprices.splitted_dataset(with_days=True, with_months=True))],
index=["Naive", "Basic", "Dropout", "SNN", "Minutely", "AddIn"],
columns=(["Training MSE", "Validation MSE", "Test MSE"]))
In [105]:
print(pd.DataFrame(epochs, columns=["Epoch"]).join(summary).to_latex(float_format="%.4f"))
In [128]:
fig, axs = plt.subplots(6, 3, sharex=True, figsize=(10,15))
for idx, axh in enumerate(axs):
i1 = random.randint(0, len(x_train))
axh[0].plot(np.arange(6), y_train[i1],
np.arange(6), np.squeeze(model_minutely.predict(np.expand_dims(x_train[i1], axis=0))))
i2 = random.randint(0, len(x_val))
axh[1].plot(np.arange(6), y_val[i2],
np.arange(6), np.squeeze(model_minutely.predict(np.expand_dims(x_val[i2], axis=0))))
i3 = random.randint(0, len(x_test))
a,b = axh[2].plot(np.arange(6), y_test[i3],
np.arange(6), np.squeeze(model_minutely.predict(np.expand_dims(x_test[i3], axis=0))))
if idx == 0:
axh[0].set_title("Training Set")
axh[1].set_title("Validation Set")
axh[2].set_title("Test Set")
plt.tight_layout()
plt.savefig("minutely-prediction-samples.pdf", format="pdf", dpi=300, bbox_inches="tight")
plt.show()
In [64]:
data13 = parse_whole_directory("models/experiment13/")
In [65]:
loss_min13 = data13.groupby(("depth", "width", "datetime")).min().groupby(("depth", "width")).mean()
In [82]:
plot3d_loss(loss_min13.val_loss, rotation=150)
plt.savefig("approach1-ex3-val-minutely.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()
In [85]:
plot3d_loss(data13.groupby(("depth", "width", "datetime")).min().groupby(("depth", "width")).std().val_loss, rotation=150)
plt.show()
In [94]:
loss_min13.val_loss.sort_values().head(10)
Out[94]:
In [104]:
deep_minutely = term_structure_to_spread_price_v2(4, 30, input_data_length=9)
deep_minutely.load_weights("models/experiment13/20170829142957_tfpool36_depth4_width30_days1_dropout0e+00_optimAdam_lr1e-03.h5")
print(*mse_prediction(deep_minutely, splitted_dataset))