In [1]:
import os
import operator
import random
import calendar

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

from vixstructure.data import FuturesByMonth
from vixstructure.utils import parse_whole_directory_monthwise

mpl.rcParams["figure.figsize"] = 16, 9

In [68]:
def plot3d_loss(dataframe, zlim=None, rotation=225):
    X = dataframe.index.levels[0]
    Y = dataframe.index.levels[1]
    X, Y = np.meshgrid(X, Y)
    Z = np.reshape(
            np.array(list(map(lambda x: dataframe[x[0], x[1]], np.reshape(np.dstack((X,Y)), (X.shape[0]*X.shape[1],2))))),
            X.shape)
    fig = plt.figure(figsize=(5, 5))
    ax = fig.gca(projection='3d')
    ax.plot_surface(X, Y, Z, linewidth=None, antialiased=True, cmap=cm.coolwarm_r)
    ax.view_init(azim=rotation)
    ax.set_xlabel("Depth")
    ax.set_ylabel("Width")
    ax.set_xlim(X[0,0], X[-1,-1])
    ax.set_ylim(Y[0,0], Y[-1,-1])
    ax.set_zlim(zlim)
    ax.set_zlabel("Loss", rotation=90)

Diff with common structure


In [3]:
data = parse_whole_directory_monthwise("models/experiment08/")

In [4]:
min_loss = data.groupby(("depth", "width", "month")).min()
monthly_mean = min_loss.groupby(("depth", "width")).mean()

In [5]:
plot3d_loss(monthly_mean.loss, rotation=60)
plot3d_loss(monthly_mean.val_loss, rotation=60)
plt.show()


Spreads with common structure


In [6]:
data1 = parse_whole_directory_monthwise("models/experiment08.1/")
min_loss1 = data1.groupby(("depth", "width", "month")).min()
monthly_mean1 = min_loss1.groupby(("depth", "width")).mean()

In [7]:
plot3d_loss(monthly_mean1.loss, rotation=60)
plot3d_loss(monthly_mean1.val_loss, rotation=60)
plt.show()



In [8]:
print(data.min())
print(data.idxmin())


loss        0.000643
val_loss    0.029266
dtype: float64
loss        (16, 30, 3, 2017-08-19 15:39:17, 930)
val_loss     (13, 27, 9, 2017-08-19 15:37:30, 42)
dtype: object

In [9]:
print(data1.min())
print(data1.idxmin())


loss        0.000175
val_loss    0.029992
dtype: float64
loss        (16, 30, 3, 2017-08-19 16:20:35, 963)
val_loss     (22, 21, 8, 2017-08-19 16:23:54, 27)
dtype: object

In [10]:
data1.groupby("month").min()


Out[10]:
loss val_loss
month
1 0.002058 0.111360
2 0.000645 0.103101
3 0.000175 0.108866
4 0.001229 0.037394
5 0.000451 0.034245
6 0.000321 0.052696
7 0.003181 0.066345
8 0.000216 0.029992
9 0.000476 0.030029
10 0.002991 0.070187
11 0.001911 0.058548
12 0.000717 0.138313

Spread with yearly structure


In [11]:
data5 = parse_whole_directory_monthwise("models/experiment08.5/")
min_loss5 = data5.groupby(("depth", "width", "month")).min()
monthly_mean5 = min_loss5.groupby(("depth", "width")).mean()

In [12]:
plot3d_loss(monthly_mean5.loss, rotation=60)
plot3d_loss(monthly_mean5.val_loss, rotation=60)
plt.show()



In [13]:
min_loss5.groupby(("depth", "width")).mean().min()


Out[13]:
loss        0.001522
val_loss    0.044664
dtype: float64

Diff with yearly structure


In [14]:
data6 = parse_whole_directory_monthwise("models/experiment08.6/")
min_loss6 = data6.groupby(("depth", "width", "month")).min()
monthly_mean6 = min_loss6.groupby(("depth", "width")).mean()

In [69]:
#plot3d_loss(monthly_mean6.loss, rotation=135)
plot3d_loss(monthly_mean6.val_loss, rotation=135)
plt.savefig("diff_yearly.pdf", format="pdf", dpi=300, bbox_inches="tight")
plt.show()



In [34]:
min_loss6.groupby(("depth", "width")).mean().min()


Out[34]:
loss        0.002032
val_loss    0.049671
dtype: float64

In [38]:
min_loss6.groupby(("depth", "width")).mean()[min_loss6.groupby(("depth", "width")).mean().val_loss < 0.055]


Out[38]:
loss val_loss
depth width
1 3 0.106809 0.049671
18 0.054954 0.051805
21 0.046323 0.054658
24 0.044503 0.051942
27 0.042852 0.050985
30 0.037773 0.052977
7 30 0.002046 0.053665
25 27 0.003731 0.053549
30 0.003040 0.052124

In [49]:
data6.groupby("month").min().plot()
plt.grid()
data6.groupby("month").min().plot()
plt.grid()
plt.show()



In [54]:
min_loss6.val_loss.groupby(("depth", "width")).mean().groupby("width").min().plot()
plt.show()



In [76]:
(data6.loss + data6.val_loss).groupby(("depth", "width", "month")).min().groupby(("depth", "width")).mean()


Out[76]:
depth  width
1      3        0.165376
       6        0.158803
       9        0.149033
       12       0.147186
       15       0.158647
       18       0.133678
       21       0.127436
       24       0.127344
       27       0.137323
       30       0.117969
4      3        0.328131
       6        0.153752
       9        0.111434
       12       0.116587
       15       0.105731
       18       0.110465
       21       0.098429
       24       0.114850
       27       0.098404
       30       0.092016
7      3        0.502421
       6        0.147649
       9        0.124753
       12       0.106704
       15       0.108419
       18       0.115860
       21       0.105159
       24       0.097907
       27       0.103891
       30       0.084355
                  ...   
22     3        0.783170
       6        0.357804
       9        0.190452
       12       0.156785
       15       0.156893
       18       0.105137
       21       0.101491
       24       0.104068
       27       0.092067
       30       0.096545
25     3        0.783256
       6        0.570163
       9        0.478458
       12       0.106868
       15       0.225993
       18       0.120149
       21       0.102966
       24       0.102949
       27       0.093464
       30       0.088058
28     3        0.711846
       6        0.572667
       9        0.581077
       12       0.193074
       15       0.104873
       18       0.262846
       21       0.136658
       24       0.097659
       27       0.091962
       30       0.108516
Length: 100, dtype: float64

Using spread prices on yearly structure with dropout


In [18]:
data7 = parse_whole_directory_monthwise("models/experiment08.7/")
min_loss7 = data7.groupby(("depth", "width", "month")).min()
monthly_mean7 = min_loss7.groupby(("depth", "width")).mean()

In [19]:
plot3d_loss(monthly_mean7.loss, rotation=60)
plot3d_loss(monthly_mean7.val_loss, rotation=60)
plt.show()



In [20]:
min_loss7.groupby(("depth", "width")).mean().min()


Out[20]:
loss        0.089845
val_loss    0.045695
dtype: float64

Now the same as above but with SeLU


In [21]:
data8 = parse_whole_directory_monthwise("models/experiment08.8/")
min_loss8 = data8.groupby(("depth", "width", "month")).min()
monthly_mean8 = min_loss8.groupby(("depth", "width")).mean()

In [22]:
plot3d_loss(monthly_mean8.loss, rotation=60)
plot3d_loss(monthly_mean8.val_loss, rotation=60)
plt.show()



In [23]:
min_loss8.groupby(("depth", "width")).mean().min()


Out[23]:
loss        0.004781
val_loss    0.037159
dtype: float64

Simple model with spread prices and yearly structure but decreasing hidden layer width


In [24]:
data9 = parse_whole_directory_monthwise("models/experiment08.9/")
min_loss9 = data9.groupby(("depth", "width", "month")).min()
monthly_mean9 = min_loss9.groupby(("depth", "width")).mean()

In [25]:
plot3d_loss(monthly_mean9.loss, rotation=120)
plot3d_loss(monthly_mean9.val_loss, rotation=120)
plt.show()



In [26]:
min_loss9.groupby(("depth", "width")).mean().min()


Out[26]:
loss        0.003698
val_loss    0.046991
dtype: float64

Naive prediction

When using spreads as input


In [41]:
naive_prediction = []
naive_prediction_test = []
naive_prediction_train = []
for month in range(1, 13):
    futures = FuturesByMonth("data/futures_per_year_and_month.h5", month, yearly=True, spreads=True)
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = futures.splitted_dataset()
    mse = np.mean(np.square(x_val[:,month - 1] - y_val[:,0]))
    naive_prediction.append(mse)
    naive_prediction_test.append(np.mean(np.square(x_test[:,month - 1] - y_test[:,0])))
    naive_prediction_train.append(np.mean(np.square(x_train[:,month - 1] - y_train[:,0])))
    print(calendar.month_name[month], mse)


January 0.0341291666667
February 0.0354402173913
March 0.0255793478261
April 0.015698989899
May 0.0156776041667
June 0.0176815789474
July 0.0547474747475
August 0.0193851648352
September 0.0183391304348
October 0.0537640625
November 0.031189893617
December 0.043425

In [77]:
naive = pd.DataFrame([naive_prediction_train, naive_prediction, naive_prediction_test],
                     columns=calendar.month_name[1:], index=["Training", "Validation", "Test"]).T
naive.plot(figsize=(8,4))
plt.grid()
plt.xticks(np.arange(12), calendar.month_abbr[1:])
plt.title("Naive prediction MSE")
plt.show()



In [78]:
naive_prediction_test


Out[78]:
[0.021595312499999936,
 0.021047282608695522,
 0.01777717391304345,
 0.034429292929293163,
 0.033567708333333258,
 0.021206315789473747,
 0.013938888888888975,
 0.018261538461538499,
 0.12314456521739138,
 0.073775000000000132,
 0.038410106382978808,
 0.088537777777777812]

Now compare all the models


In [45]:
conct = pd.concat([#data.groupby("month").min().val_loss,
                   #data1.groupby("month").min().val_loss,
                   #data5.groupby("month").min().val_loss,
                   data6.groupby("month").min().val_loss,
                   #data7.groupby("month").min().val_loss,
                   #data8.groupby("month").min().val_loss,
                   #data9.groupby("month").min().val_loss,
                   pd.Series(naive_prediction, index=range(1, 13))], axis=1)

In [46]:
# Regardless of the model there are some months easier and some month harder to predict.
conct.plot(linewidth=2)
plt.legend((0, 1, 5, 6, 7, 8, 9, "naive"))
plt.show()



In [ ]: