In [1]:

    
import os
import operator
import random
import calendar

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

from vixstructure.data import FuturesByMonth
from vixstructure.utils import parse_whole_directory_monthwise

mpl.rcParams["figure.figsize"] = 16, 9



In [68]:

    
def plot3d_loss(dataframe, zlim=None, rotation=225):
    X = dataframe.index.levels[0]
    Y = dataframe.index.levels[1]
    X, Y = np.meshgrid(X, Y)
    Z = np.reshape(
            np.array(list(map(lambda x: dataframe[x[0], x[1]], np.reshape(np.dstack((X,Y)), (X.shape[0]*X.shape[1],2))))),
            X.shape)
    fig = plt.figure(figsize=(5, 5))
    ax = fig.gca(projection='3d')
    ax.plot_surface(X, Y, Z, linewidth=None, antialiased=True, cmap=cm.coolwarm_r)
    ax.view_init(azim=rotation)
    ax.set_xlabel("Depth")
    ax.set_ylabel("Width")
    ax.set_xlim(X[0,0], X[-1,-1])
    ax.set_ylim(Y[0,0], Y[-1,-1])
    ax.set_zlim(zlim)
    ax.set_zlabel("Loss", rotation=90)

Diff with common structure



In [3]:

    
data = parse_whole_directory_monthwise("models/experiment08/")



In [4]:

    
min_loss = data.groupby(("depth", "width", "month")).min()
monthly_mean = min_loss.groupby(("depth", "width")).mean()



In [5]:

    
plot3d_loss(monthly_mean.loss, rotation=60)
plot3d_loss(monthly_mean.val_loss, rotation=60)
plt.show()

Spreads with common structure



In [6]:

    
data1 = parse_whole_directory_monthwise("models/experiment08.1/")
min_loss1 = data1.groupby(("depth", "width", "month")).min()
monthly_mean1 = min_loss1.groupby(("depth", "width")).mean()



In [7]:

    
plot3d_loss(monthly_mean1.loss, rotation=60)
plot3d_loss(monthly_mean1.val_loss, rotation=60)
plt.show()



In [8]:

    
print(data.min())
print(data.idxmin())









    



loss        0.000643
val_loss    0.029266
dtype: float64
loss        (16, 30, 3, 2017-08-19 15:39:17, 930)
val_loss     (13, 27, 9, 2017-08-19 15:37:30, 42)
dtype: object



In [9]:

    
print(data1.min())
print(data1.idxmin())









    



loss        0.000175
val_loss    0.029992
dtype: float64
loss        (16, 30, 3, 2017-08-19 16:20:35, 963)
val_loss     (22, 21, 8, 2017-08-19 16:23:54, 27)
dtype: object



In [10]:

    
data1.groupby("month").min()

Spread with yearly structure



In [11]:

    
data5 = parse_whole_directory_monthwise("models/experiment08.5/")
min_loss5 = data5.groupby(("depth", "width", "month")).min()
monthly_mean5 = min_loss5.groupby(("depth", "width")).mean()



In [12]:

    
plot3d_loss(monthly_mean5.loss, rotation=60)
plot3d_loss(monthly_mean5.val_loss, rotation=60)
plt.show()



In [13]:

    
min_loss5.groupby(("depth", "width")).mean().min()









    Out[13]:





loss        0.001522
val_loss    0.044664
dtype: float64

Diff with yearly structure



In [14]:

    
data6 = parse_whole_directory_monthwise("models/experiment08.6/")
min_loss6 = data6.groupby(("depth", "width", "month")).min()
monthly_mean6 = min_loss6.groupby(("depth", "width")).mean()



In [69]:

    
#plot3d_loss(monthly_mean6.loss, rotation=135)
plot3d_loss(monthly_mean6.val_loss, rotation=135)
plt.savefig("diff_yearly.pdf", format="pdf", dpi=300, bbox_inches="tight")
plt.show()



In [34]:

    
min_loss6.groupby(("depth", "width")).mean().min()









    Out[34]:





loss        0.002032
val_loss    0.049671
dtype: float64



In [38]:

    
min_loss6.groupby(("depth", "width")).mean()[min_loss6.groupby(("depth", "width")).mean().val_loss < 0.055]



In [49]:

    
data6.groupby("month").min().plot()
plt.grid()
data6.groupby("month").min().plot()
plt.grid()
plt.show()



In [54]:

    
min_loss6.val_loss.groupby(("depth", "width")).mean().groupby("width").min().plot()
plt.show()



In [76]:

    
(data6.loss + data6.val_loss).groupby(("depth", "width", "month")).min().groupby(("depth", "width")).mean()









    Out[76]:





depth  width
1      3        0.165376
       6        0.158803
       9        0.149033
       12       0.147186
       15       0.158647
       18       0.133678
       21       0.127436
       24       0.127344
       27       0.137323
       30       0.117969
4      3        0.328131
       6        0.153752
       9        0.111434
       12       0.116587
       15       0.105731
       18       0.110465
       21       0.098429
       24       0.114850
       27       0.098404
       30       0.092016
7      3        0.502421
       6        0.147649
       9        0.124753
       12       0.106704
       15       0.108419
       18       0.115860
       21       0.105159
       24       0.097907
       27       0.103891
       30       0.084355
                  ...   
22     3        0.783170
       6        0.357804
       9        0.190452
       12       0.156785
       15       0.156893
       18       0.105137
       21       0.101491
       24       0.104068
       27       0.092067
       30       0.096545
25     3        0.783256
       6        0.570163
       9        0.478458
       12       0.106868
       15       0.225993
       18       0.120149
       21       0.102966
       24       0.102949
       27       0.093464
       30       0.088058
28     3        0.711846
       6        0.572667
       9        0.581077
       12       0.193074
       15       0.104873
       18       0.262846
       21       0.136658
       24       0.097659
       27       0.091962
       30       0.108516
Length: 100, dtype: float64

Using spread prices on yearly structure with dropout



In [18]:

    
data7 = parse_whole_directory_monthwise("models/experiment08.7/")
min_loss7 = data7.groupby(("depth", "width", "month")).min()
monthly_mean7 = min_loss7.groupby(("depth", "width")).mean()



In [19]:

    
plot3d_loss(monthly_mean7.loss, rotation=60)
plot3d_loss(monthly_mean7.val_loss, rotation=60)
plt.show()



In [20]:

    
min_loss7.groupby(("depth", "width")).mean().min()









    Out[20]:





loss        0.089845
val_loss    0.045695
dtype: float64

Now the same as above but with SeLU



In [21]:

    
data8 = parse_whole_directory_monthwise("models/experiment08.8/")
min_loss8 = data8.groupby(("depth", "width", "month")).min()
monthly_mean8 = min_loss8.groupby(("depth", "width")).mean()



In [22]:

    
plot3d_loss(monthly_mean8.loss, rotation=60)
plot3d_loss(monthly_mean8.val_loss, rotation=60)
plt.show()



In [23]:

    
min_loss8.groupby(("depth", "width")).mean().min()









    Out[23]:





loss        0.004781
val_loss    0.037159
dtype: float64

Simple model with spread prices and yearly structure but decreasing hidden layer width



In [24]:

    
data9 = parse_whole_directory_monthwise("models/experiment08.9/")
min_loss9 = data9.groupby(("depth", "width", "month")).min()
monthly_mean9 = min_loss9.groupby(("depth", "width")).mean()



In [25]:

    
plot3d_loss(monthly_mean9.loss, rotation=120)
plot3d_loss(monthly_mean9.val_loss, rotation=120)
plt.show()



In [26]:

    
min_loss9.groupby(("depth", "width")).mean().min()









    Out[26]:





loss        0.003698
val_loss    0.046991
dtype: float64

Naive prediction

When using spreads as input



In [41]:

    
naive_prediction = []
naive_prediction_test = []
naive_prediction_train = []
for month in range(1, 13):
    futures = FuturesByMonth("data/futures_per_year_and_month.h5", month, yearly=True, spreads=True)
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = futures.splitted_dataset()
    mse = np.mean(np.square(x_val[:,month - 1] - y_val[:,0]))
    naive_prediction.append(mse)
    naive_prediction_test.append(np.mean(np.square(x_test[:,month - 1] - y_test[:,0])))
    naive_prediction_train.append(np.mean(np.square(x_train[:,month - 1] - y_train[:,0])))
    print(calendar.month_name[month], mse)









    



January 0.0341291666667
February 0.0354402173913
March 0.0255793478261
April 0.015698989899
May 0.0156776041667
June 0.0176815789474
July 0.0547474747475
August 0.0193851648352
September 0.0183391304348
October 0.0537640625
November 0.031189893617
December 0.043425



In [77]:

    
naive = pd.DataFrame([naive_prediction_train, naive_prediction, naive_prediction_test],
                     columns=calendar.month_name[1:], index=["Training", "Validation", "Test"]).T
naive.plot(figsize=(8,4))
plt.grid()
plt.xticks(np.arange(12), calendar.month_abbr[1:])
plt.title("Naive prediction MSE")
plt.show()



In [78]:

    
naive_prediction_test









    Out[78]:





[0.021595312499999936,
 0.021047282608695522,
 0.01777717391304345,
 0.034429292929293163,
 0.033567708333333258,
 0.021206315789473747,
 0.013938888888888975,
 0.018261538461538499,
 0.12314456521739138,
 0.073775000000000132,
 0.038410106382978808,
 0.088537777777777812]

Now compare all the models



In [45]:

    
conct = pd.concat([#data.groupby("month").min().val_loss,
                   #data1.groupby("month").min().val_loss,
                   #data5.groupby("month").min().val_loss,
                   data6.groupby("month").min().val_loss,
                   #data7.groupby("month").min().val_loss,
                   #data8.groupby("month").min().val_loss,
                   #data9.groupby("month").min().val_loss,
                   pd.Series(naive_prediction, index=range(1, 13))], axis=1)



In [46]:

    
# Regardless of the model there are some months easier and some month harder to predict.
conct.plot(linewidth=2)
plt.legend((0, 1, 5, 6, 7, 8, 9, "naive"))
plt.show()



In [ ]:

	loss	val_loss
month
1	0.002058	0.111360
2	0.000645	0.103101
3	0.000175	0.108866
4	0.001229	0.037394
5	0.000451	0.034245
6	0.000321	0.052696
7	0.003181	0.066345
8	0.000216	0.029992
9	0.000476	0.030029
10	0.002991	0.070187
11	0.001911	0.058548
12	0.000717	0.138313

		loss	val_loss
depth	width
1	3	0.106809	0.049671
	18	0.054954	0.051805
	21	0.046323	0.054658
	24	0.044503	0.051942
	27	0.042852	0.050985
	30	0.037773	0.052977
7	30	0.002046	0.053665
25	27	0.003731	0.053549
25	30	0.003040	0.052124