In [1]:

    
import operator
from functools import reduce
from itertools import repeat, cycle
import datetime
import calendar
from collections import Counter

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection, LineCollection
from matplotlib import colors as mcolors

from vixstructure.data import TermStructure, Expirations



In [2]:

    
mpl.rcParams["figure.figsize"] = 16, 9



In [3]:

    
x = np.linspace(-3, 3, 61)
fig = plt.figure(figsize=(3.5,2))
plt.axvline(0, color="black", lw=1)
plt.axhline(0, color="black", lw=1)
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.spines['left'].set_smart_bounds(True)
ax.spines['bottom'].set_smart_bounds(True)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.plot(x, np.maximum(0 ,x), lw=2)
plt.xticks(range(-3, 6), [])
plt.yticks(range(-3, 6), [])
plt.text(-0.1, -0.1, 0, ha="right", va="top", size="large")
plt.ylim(-0.5, 3.5)
plt.xlim(-3.5, 3.5)
plt.savefig("rectifier.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [22]:

    
def selu(z, alpha=1.6732632423543772848170429916717, lam=1.0507009873554804934193349852946):
    return lam * z if z > 0 else alpha * np.exp(z) - alpha
vselu = np.vectorize(selu)
fig = plt.figure(figsize=(2.5,2))
plt.axvline(0, color="black", lw=1)
plt.axhline(0, color="black", lw=1)
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.spines['left'].set_smart_bounds(True)
ax.spines['bottom'].set_smart_bounds(True)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.plot(x, vselu(x), lw=2)
plt.xticks(range(-3, 6), [])
plt.yticks(range(-3, 6), [])
plt.text(-0.1, -0.1, 0, ha="right", va="top", size="large")
plt.ylim(-2.5, 3.5)
plt.xlim(-3.5, 3.5)
plt.show()



In [4]:

    
# Consider value '0' as NaN.
xm_settle = pd.read_csv("data/8_m_settle.csv", usecols=range(1,10), dtype=np.float32,
                        parse_dates=[0], header=0, index_col=0, na_values=0)
xm_symbols = pd.read_csv("data/8_m_symbols.csv", usecols=range(1,10), parse_dates=[0],
                         header=0, index_col=0, na_values=0)
expiration_months = pd.read_csv("data/expiration.months.csv", header=0, usecols=range(1,13), dtype=np.float32)
vix_index = pd.read_csv("data/vix.csv", parse_dates=[0], header=0, index_col=0, na_values="null",
                        dtype=np.float32)
expirations = pd.read_csv("data/expirations.csv", parse_dates=list(range(0,9)), usecols=range(1,10),
                                header=0, index_col=0)



In [5]:

    
vix_index["Adj Close"]["2006":"2016"].plot(figsize=(8,4))
plt.xlabel("")
plt.savefig("vix.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()

Plot all the term structures in some large range

Helpful to get a general feeling.



In [6]:

    
# Plot part of the data.
lines = xm_settle.iloc[1000:2000].T
lines.fillna(method='pad').plot(legend=False, figsize=(16,8), colormap=cm.Blues)
plt.grid()
plt.show()

And this is a single term structure

One of the nice and clean ones.



In [7]:

    
data_line = xm_settle.iloc[1240:3000:252].iloc[1:]
data_line.T.plot(style="-", legend=False, figsize=(4,5))
plt.grid()
plt.xticks(np.arange(8), calendar.month_abbr[3:])
plt.legend(tuple(map(lambda x: x.strftime("%m/%d/%Y"), data_line.index.date)), title=False)
plt.savefig("term-structure.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [8]:

    
data_line = xm_settle.loc["2008-11-20":"2008-12-17"]
data_line.T.plot(style="-", legend=False, figsize=(4,5), colormap=cm.coolwarm)
plt.grid()
plt.xticks(np.arange(8), calendar.month_abbr[12:] + calendar.month_abbr[1:8])
plt.savefig("term-structure-crisis.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [9]:

    
data_line = xm_settle.loc["2012-02-29"]
ax1 = data_line.T.plot(style="-o", legend=False, figsize=(8,4), color="green")
ax1.set_ylabel("futures price", color="green")
plt.xticks(np.arange(8), calendar.month_abbr[3:11])
ax1.tick_params(axis='y', colors='green')
data_line_spread = data_line.aggregate(
    lambda x: pd.Series([np.nan] + [2*x[i] - x[i-1] - x[i+1] for i in range(1, len(x)-1)] + [np.nan],
                        index=calendar.month_abbr[3:11]))
ax2 = data_line_spread.plot(secondary_y=True, color="blue", style="-o")
ax2.set_ylabel("spread price", rotation=270, color="blue")
ax2.tick_params(axis='y', colors='blue')
ax1.grid(axis="x")
plt.axhline(0, color="CornflowerBlue", linestyle="--")
plt.savefig("long_spread.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()

Here I plot the contangos for a large range

Notice how the fluctuation of the M2-M1 contango is the largest while it steadily decreases for later expiration months.



In [10]:

    
mxs = [xm_settle.iloc[2000:3000, i] for i in range(8)]
contangos = [(mxs[i + 1] - mxs[i]) / mxs[i] for i in range(8 - 1)]
contango_labels = ["M{}-M{}-Contango".format(i+1, i) for i in range(1,8)]
plt.figure(figsize=(16,10))
for i in range(len(contangos)):
    contangos[i].plot(label=contango_labels[i], legend=True)
plt.axhline(0, color="black")
plt.xlabel("")
plt.grid()
plt.show()



In [11]:

    
vix = vix_index["Adj Close"]   # Only this one is needed for the index.
trainingdata = pd.merge(pd.DataFrame(vix), xm_settle, left_index=True, right_index=True)

Experimenting with data normalization

The make sure the training error isn't exploding it's best to normalize the data so its in (-1,1)-range. Doing this as simple as possible is preferred. Too much data wrangling beforehand might introduce some unwanted prior. Wenn normalizing the data all NaN values (including 0) aren't considered because they stand for not available data and influence the result too much.



In [12]:

    
mean = trainingdata.mean()
ptp = trainingdata.max() - trainingdata.min()
normalized = (trainingdata - mean) / ptp



In [13]:

    
normalized.plot(figsize=(16,10))
plt.grid()
plt.show()

Mapping Data to anual structure

At the moment you have a x-axis [M1,...,M8] but I want it [Jan,Feb,...,Dec]. All the points without given values are NaN. Something like expiration_months but with dates.



In [14]:

    
# Create a new data frame (not very efficient)
xm_year = pd.DataFrame(index=xm_settle.index, columns=expiration_months.columns, dtype=np.float32)
def symbol_to_month(symbol):
    mapping_dict = {"F":"January", "G":"February", "H":"March", "J":"April", "K":"May", "M":"June",
                    "N":"July", "Q":"August", "U":"September", "V":"October", "X":"November", "Z":"December"}
    return mapping_dict[symbol[0]]
for date, data in xm_year.iterrows():
    symbol = xm_symbols.loc[date].dropna().map(symbol_to_month)
    settle = xm_settle[symbol.index].loc[date]
    data[symbol] = settle



In [15]:

    
def plot_xm_year(index, scale=None, save=False):
    global xm_year
    month_to_x = {month:idx for idx, month in enumerate(xm_year.columns)}
    data = xm_year.iloc[index].dropna()
    x = data.rename(month_to_x).index.values
    y = data.values
    global xm_symbols
    months = xm_symbols.iloc[index].dropna().map(symbol_to_month).map(month_to_x)
    months_increasing = all(months[i] < months[i+1] for i in range(len(months) - 1))
    plt.figure(figsize=(16,9))
    if months_increasing:
        plt.plot(x, y, "-ob")
    else:
        splitindex = len(months) - months.values.argmin()
        plt.plot(x[:splitindex], y[:splitindex], "-ob")
        plt.plot(x[splitindex:], y[splitindex:], "-ob")
    plt.xticks(np.arange(12), xm_year.columns)
    if scale:
        y_min = xm_year.min().min()
        y_max = xm_year.max().max()
        plt.ylim(y_min, y_max)
    plt.xlim(0, 11)
    ydiff = plt.ylim()[1] - plt.ylim()[0]
    xdiff = plt.xlim()[1] - plt.xlim()[0]
    for idx, idy in zip(x, y):
        plt.text(idx + xdiff/200, idy + ydiff/200, idy)
    plt.grid(axis="x")
    plt.title(xm_year.iloc[index].name.date())
    if save:
        plt.savefig("img_annual_structure/{:04d}.png".format(index))
        plt.close()
    else:
        plt.show()
plot_xm_year(3000)



In [16]:

    
# Create a new data frame for spread prices
long_prices = pd.DataFrame(index=xm_year.index, columns=xm_year.columns, dtype=np.float32)
for date, data in xm_year.iterrows():
    data = np.concatenate((data[0:1], data, data[-1:]))
    # Calculate long prices (buy 2-1-1)
    prices = [2*data[i] - data[i-1] - data[i+1] for i in range(1,13)]
    long_prices.loc[date] = prices



In [17]:

    
index = 3000
long_prices.iloc[index].plot(figsize=(16,9))
(xm_year.iloc[index] - xm_year.iloc[index].mean()).plot()
plt.grid()
plt.xticks(np.arange(12), long_prices.columns)
plt.legend(("Long prices", "Centered futures"), loc="upper left")
plt.show()

What to do with NaN values?

There is a high number of NaN values especially with early data. It is the best to remove this first part because it is simply to unstable.



In [18]:

    
# There are times when no spread prices can be calculated.
long_prices.dropna(how="all").shape









    Out[18]:





(2887, 12)



In [19]:

    
def plot_one_date(data_point, ha="left", va="bottom"):
    plt.plot(data_point.name, data_point.isnull().sum(), 'rx')
    plt.text(data_point.name, data_point.isnull().sum(), data_point.name.date(), ha=ha, va=va, color="r")
xs_null = xm_settle.isnull().sum(axis=1)
xs_null.plot()
plt.ylabel("Number of missing values")
plt.xlabel("")
count_nan = xm_settle.isnull().sum(axis=1)
last_day_with_many_nans = count_nan[count_nan > 2].index[-1]
first_day_with_usable_data = xm_settle.loc[last_day_with_many_nans:].iloc[1]
plot_one_date(first_day_with_usable_data, "right", "top")
plot_one_date(xm_settle.loc[last_day_with_many_nans])
plt.show()



In [20]:

    
xs_null = xs_null[xs_null > 0]
xs_null.groupby(xs_null.index.year).max().plot.bar(figsize=(4,3), color="r", width=0.8)
xs_full = xm_settle.isnull().sum(axis=1)
xs_full.groupby(xs_full.index.year).mean().plot.bar(stacked=True, width=0.8)
plt.xlim(-0.6,7.5)
plt.legend(("Max", "Mean"))
plt.xlabel("")
plt.ylabel("Number of missing values")
plt.grid(axis="y")
plt.savefig("missing_values.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [21]:

    
clip_settle = xm_settle.loc[last_day_with_many_nans + datetime.timedelta(days=1):]
clip_year = xm_year.loc[last_day_with_many_nans + datetime.timedelta(days=1):]
assert clip_settle.index.identical(clip_year.index)
print("These are the data points you can actually use:")
len(clip_settle)









    



These are the data points you can actually use:






    Out[21]:





2656



In [22]:

    
clip_settle.loc["2006-11-15"].plot(figsize=(16,9))
plt.show()



In [23]:

    
clip_settle.isnull().sum()









    Out[23]:





M1      0
M2      0
M3      0
M4      0
M5      0
M6     27
M7    186
M8    408
dtype: int64



In [24]:

    
clip_settle.interpolate()[clip_settle.isnull()["M6"] == True].T.plot(figsize=(16,9))
plt.axvspan(5, 8, color=(0.9,0.9,1))
plt.show()

How large are the gaps in the data from holidays, weekends etc.?

A gap of one day is normal
A gap of two days means a weekend (and that is essentially every fifth day)



In [25]:

    
dates = pd.Series(xm_settle.index)



In [26]:

    
dates_diff = dates.diff()
dates_diff.groupby(dates_diff).count()









    Out[26]:





date
1 days    2592
2 days      27
3 days     594
4 days      89
5 days       2
Name: date, dtype: int64



In [27]:

    
weekdays = dates.map(operator.methodcaller("weekday"))
name_of_weekday = tuple(calendar.day_abbr)
weekdays = weekdays.groupby(weekdays).count()
weekdays.index = weekdays.index.map(lambda x: name_of_weekday[x])
weekdays.plot.bar(figsize=(4,3), width=0.6)
plt.xlabel("")
plt.savefig("weekdays.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()

Finding the right input data

Note that only data from 2006-10-23 is used (look above).



In [28]:

    
expirations = expirations.loc["2006-10-23":]
xm_settle = xm_settle.loc["2006-10-23":]
assert expirations.shape == xm_settle.shape

1. Time to expiration



In [29]:

    
expirations_v1 = expirations["V1"]



In [30]:

    
until_expiration = pd.Series(expirations_v1.values - expirations_v1.index.values)
assert len(until_expiration) == len(expirations)
until_expiration.index = expirations.index
until_expiration.name = "Expiration"



In [31]:

    
xm_settle.iloc[-1].plot(figsize=(10,5))
plt.axhline(vix[-1], color="black")
date = xm_settle.iloc[-1].name
plt.title("{} ({} days to expiration)".format(date.date(), until_expiration.loc[date].days))
plt.show()

2. Difference between term structure legs

This is the input data.



In [32]:

    
settle_diff = xm_settle.diff(axis=1).iloc[:,1:] # Because first column only has NaNs
settle_diff = settle_diff.join(xm_settle.iloc[:,0]) # Get original first value in
settle_diff = settle_diff.iloc[:,range(-1,7)] # Get the order right: M1 -> M8
settle_diff









    Out[32]:







  
    
      
      M1
      M2
      M3
      M4
      M5
      M6
      M7
      M8
    
    
      date
      
      
      
      
      
      
      
      
    
  
  
    
      2006-10-23
      11.900000
      1.250000
      0.930000
      0.570000
      0.380000
      0.500000
      0.170000
      NaN
    
    
      2006-10-24
      11.770000
      1.040000
      1.250000
      0.440000
      0.660000
      0.250000
      0.190001
      NaN
    
    
      2006-10-25
      11.490000
      1.030001
      1.219999
      0.730000
      0.450000
      0.510000
      0.170000
      NaN
    
    
      2006-10-26
      11.270000
      0.950000
      1.280000
      0.660000
      0.800000
      0.290000
      0.190000
      NaN
    
    
      2006-10-27
      11.350000
      0.900000
      1.240000
      0.740000
      0.760000
      0.200000
      0.260000
      NaN
    
    
      2006-10-30
      11.300000
      0.970000
      1.080000
      0.799999
      0.510000
      0.440001
      0.365000
      NaN
    
    
      2006-10-31
      11.350000
      0.799999
      1.050000
      0.750000
      0.660000
      0.370000
      0.270000
      NaN
    
    
      2006-11-01
      11.510000
      0.940000
      0.790000
      0.710000
      0.540000
      0.390000
      0.200000
      NaN
    
    
      2006-11-02
      11.660000
      0.780000
      0.720000
      0.640000
      0.710000
      0.240000
      0.300000
      NaN
    
    
      2006-11-03
      11.500000
      0.980000
      0.560000
      0.900000
      0.520000
      0.300000
      0.240000
      NaN
    
    
      2006-11-06
      11.140000
      0.860000
      0.950000
      0.770000
      0.550000
      0.419999
      0.310000
      NaN
    
    
      2006-11-07
      11.210000
      0.860000
      0.810000
      0.809999
      0.460000
      0.380000
      0.460000
      NaN
    
    
      2006-11-08
      10.900000
      0.920000
      0.980000
      0.500000
      0.639999
      0.490001
      0.469999
      NaN
    
    
      2006-11-09
      10.950000
      0.810000
      1.040000
      0.500000
      0.630000
      0.520000
      0.380000
      NaN
    
    
      2006-11-10
      10.990000
      0.990000
      0.740001
      0.440000
      0.590000
      0.640000
      0.389999
      NaN
    
    
      2006-11-13
      10.590000
      1.360000
      0.780000
      0.450001
      0.520000
      0.630000
      0.450000
      NaN
    
    
      2006-11-14
      10.240000
      1.460000
      0.820001
      0.500000
      0.580000
      0.670000
      0.410000
      NaN
    
    
      2006-11-15
      11.600000
      0.700000
      0.650000
      0.600000
      0.550000
      0.410000
      NaN
      NaN
    
    
      2006-11-16
      11.600000
      0.679999
      0.640000
      0.510000
      0.570000
      0.530000
      NaN
      NaN
    
    
      2006-11-17
      11.650000
      0.700001
      0.570000
      0.480000
      0.560000
      0.540000
      NaN
      NaN
    
    
      2006-11-20
      11.400000
      0.950001
      0.719999
      0.280001
      0.549999
      0.660001
      0.889999
      NaN
    
    
      2006-11-21
      11.520000
      0.980000
      0.780000
      0.220000
      0.450000
      0.560000
      NaN
      NaN
    
    
      2006-11-22
      11.600000
      1.040000
      0.920000
      0.160000
      0.230000
      0.840000
      NaN
      NaN
    
    
      2006-11-24
      11.710000
      1.140000
      0.690000
      0.560000
      0.170000
      0.629999
      NaN
      NaN
    
    
      2006-11-27
      12.450000
      0.990000
      0.560000
      0.370000
      0.220000
      0.520000
      NaN
      NaN
    
    
      2006-11-28
      11.950000
      1.000000
      0.710000
      0.440001
      0.349999
      0.530000
      NaN
      NaN
    
    
      2006-11-29
      11.420000
      1.180000
      0.860000
      0.470000
      0.480000
      0.540000
      NaN
      NaN
    
    
      2006-11-30
      11.360000
      1.170000
      0.750000
      0.550000
      0.550000
      0.580000
      NaN
      NaN
    
    
      2006-12-01
      11.600000
      1.150000
      0.760000
      0.510000
      0.469999
      0.660000
      NaN
      NaN
    
    
      2006-12-04
      11.500000
      1.000000
      0.680000
      0.670000
      0.540000
      0.660000
      NaN
      NaN
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2017-03-30
      12.820000
      0.530001
      0.820000
      0.880000
      0.440000
      0.790001
      0.469999
      0.250000
    
    
      2017-03-31
      13.280000
      0.300000
      0.670000
      0.750000
      0.500000
      0.760000
      0.440001
      0.330000
    
    
      2017-04-03
      13.480000
      0.100000
      0.690001
      0.809999
      0.490000
      0.770000
      0.410000
      0.299999
    
    
      2017-04-04
      13.240000
      0.130000
      0.760000
      0.820000
      0.450000
      0.700001
      0.400000
      0.320000
    
    
      2017-04-05
      13.900000
      -0.049999
      0.700000
      0.750000
      0.380000
      0.770000
      0.289999
      0.260000
    
    
      2017-04-06
      13.600000
      -0.050000
      0.599999
      0.810000
      0.390000
      0.699999
      0.420000
      0.310001
    
    
      2017-04-07
      14.050000
      -0.190001
      0.690001
      0.700000
      0.350000
      0.699999
      0.400002
      0.250000
    
    
      2017-04-10
      15.050000
      -0.720000
      0.460000
      0.650000
      0.320001
      0.719999
      0.320000
      0.270000
    
    
      2017-04-11
      16.000000
      -1.470000
      0.350000
      0.650000
      0.340000
      0.650001
      0.330000
      0.299999
    
    
      2017-04-12
      16.299999
      -1.349999
      0.070001
      0.570000
      0.309999
      0.660000
      0.320000
      0.300001
    
    
      2017-04-13
      16.299999
      -1.099999
      0.070001
      0.580000
      0.250000
      0.600000
      0.369999
      0.270000
    
    
      2017-04-17
      15.070000
      -0.770000
      0.250000
      0.670000
      0.330000
      0.700000
      0.350000
      0.250000
    
    
      2017-04-18
      14.700000
      -0.380000
      0.220000
      0.630000
      0.320000
      0.710001
      0.349998
      0.340000
    
    
      2017-04-19
      14.580000
      -0.050000
      0.590000
      0.310000
      0.740000
      0.400000
      0.300001
      0.080000
    
    
      2017-04-20
      14.320000
      0.020000
      0.650000
      0.390000
      0.790000
      0.400000
      0.300001
      0.029999
    
    
      2017-04-21
      14.300000
      -0.060000
      0.560000
      0.349999
      0.780001
      0.410000
      0.360001
      0.049999
    
    
      2017-04-24
      12.680000
      0.620000
      0.889999
      0.510000
      0.890000
      0.430000
      0.359999
      0.070002
    
    
      2017-04-25
      12.500000
      0.600000
      0.799999
      0.500000
      0.800000
      0.450000
      0.400000
      0.130001
    
    
      2017-04-26
      12.350000
      0.839999
      0.860001
      0.500000
      0.800000
      0.379999
      0.360001
      0.160000
    
    
      2017-04-27
      12.400000
      0.700001
      0.969999
      0.510000
      0.820000
      0.400001
      0.349999
      0.100000
    
    
      2017-04-28
      12.250000
      0.860000
      0.870000
      0.500000
      0.780001
      0.429999
      0.410001
      0.109999
    
    
      2017-05-01
      11.850000
      0.750000
      0.950000
      0.570000
      0.800000
      0.380000
      0.410000
      0.160000
    
    
      2017-05-02
      11.800000
      1.020000
      0.930000
      0.450000
      0.850000
      0.349999
      0.350000
      0.170000
    
    
      2017-05-03
      12.100000
      0.849999
      0.910000
      0.490001
      0.750000
      0.450000
      0.349999
      0.150000
    
    
      2017-05-04
      11.730000
      0.940001
      0.930000
      0.509999
      0.840000
      0.400001
      0.450000
      0.150000
    
    
      2017-05-05
      11.780000
      0.890000
      0.990000
      0.560000
      0.880000
      0.450000
      0.400000
      0.160001
    
    
      2017-05-08
      11.400000
      1.100000
      1.200000
      0.630000
      0.850000
      0.450000
      0.370000
      0.200001
    
    
      2017-05-09
      11.400000
      1.080000
      1.190001
      0.680000
      0.799999
      0.470000
      0.380000
      0.190001
    
    
      2017-05-10
      11.550000
      0.920000
      1.210000
      0.719999
      0.900001
      0.429999
      0.370001
      0.230000
    
    
      2017-05-11
      11.450000
      1.160000
      1.080000
      0.660001
      0.950000
      0.429999
      0.400000
      0.170000
    
  

2656 rows × 8 columns



In [33]:

    
settle_diff.plot(figsize=(16,9))
print(settle_diff.min().min(), settle_diff.max().max())
plt.title("Differences between spread prices.")
plt.show()

Get targets (spread prices)



In [34]:

    
expiration_indices = until_expiration.where(until_expiration == pd.Timedelta(0))
expiration_indices.index = range(len(expiration_indices))
expiration_indices = expiration_indices.dropna().index
expiration_indices









    Out[34]:





Int64Index([  17,   41,   57,   77,  101,  120,  140,  164,  183,  208,
            ...
            2471, 2495, 2515, 2535, 2559, 2576, 2596, 2620, 2640, 2655],
           dtype='int64', length=127)



In [35]:

    
# First consider the expired futures. One the next day they get value NaN.
spreads = xm_settle.copy()
spreads.iloc[expiration_indices[:-1] + 1] = xm_settle.iloc[expiration_indices[:-1] + 1].iloc[:,1:].assign(M1=lambda x: np.NaN).iloc[:,range(-1,7)]
assert len(spreads[spreads.M1.isnull() == True]) == len(expiration_indices) - 1
def calculate_long_prices(term: pd.Series):
    longs = [2*term[i] - term[i-1] - term[i+1] for i in range(1, len(term)-1)]
    return pd.Series(longs, term[1:-1].index)
spreads = spreads.apply(calculate_long_prices, axis=1)



In [36]:

    
xm_settle.shape









    Out[36]:





(2656, 8)



In [37]:

    
assert spreads.index.identical(settle_diff.index)



In [38]:

    
xm_settle7 = xm_settle.loc[:,"M1":"M7"]
xm_settle7_test = xm_settle7.loc["2006-11-15"].dropna()
xm_settle7_test









    Out[38]:





M1    11.60
M2    12.30
M3    12.95
M4    13.55
M5    14.10
M6    14.51
Name: 2006-11-15 00:00:00, dtype: float32



In [39]:

    
xm_settle7_test.index = list(range(len(xm_settle7_test.index)))



In [40]:

    
z = np.polyfit(xm_settle7_test.index, xm_settle7_test, 2)
p = np.poly1d(z)
xm_settle7_test.plot()
xp = np.linspace(0, 5, 100)
plt.plot(xp, p(xp))
plt.show()



In [41]:

    
xm_settle7[xm_settle7.M7.isnull()].T.plot(figsize=(10,200), legend=False, subplots=True)
plt.show()

Normalization



In [42]:

    
spreads_norm = (spreads - spreads.mean()) / (spreads.max() - spreads.min())
print(spreads_norm.min().min(), spreads_norm.max().max())
spreads_norm.plot()
plt.show()









    



-0.79177435516 0.620398106199



In [43]:

    
settle_norm = (settle_diff - settle_diff.mean()) / (settle_diff.max() - settle_diff.min())
print(settle_norm.min().min(), settle_norm.max().max())
settle_norm.plot()
plt.show()









    



-0.824046 0.817341



In [44]:

    
settle_denorm = settle_norm.values * (settle_diff.max() - settle_diff.min()).values + settle_diff.mean().values
pd.DataFrame(settle_denorm).plot()
plt.show()



In [45]:

    
settle_test = (settle_diff - settle_denorm)
settle_test.plot()
plt.show()

Splitting the data

We need a set for training, cross validation and testing.



In [46]:

    
fig, axes = plt.subplots(nrows=2, sharex=True, figsize=(10,10))
settle_diff.plot(ax=axes[0])
def plot_vlines(ax):
    l = len(settle_diff)
    s = int(l * 0.15 / 2)
    # Validation (red)
    ax.axvspan(settle_diff.index[int(l/2-s)], settle_diff.index[int(l / 2)], facecolor="r", alpha=0.3)
    ax.axvspan(settle_diff.index[-s], settle_diff.index[-1], facecolor="r", alpha=0.3)
    # Test (green)
    ax.axvspan(settle_diff.index[int(l/2-2*s)], settle_diff.index[int(l/2)-s], facecolor="g", alpha=0.3)
    ax.axvspan(settle_diff.index[-2*s], settle_diff.index[-s], facecolor="g", alpha=0.3)
plot_vlines(axes[0])
axes[0].set_title("Differences of futures' term structures")
spreads.plot(ax=axes[1])
plot_vlines(axes[1])
axes[1].set_title("Butterfly spread prices")
plt.show()

Naive prediction

What is the loss when the network always the same as the current day? (identity)



In [47]:

    
expirations = Expirations("data/expirations.csv")
termStructure = TermStructure("data/8_m_settle.csv", expirations)



In [48]:

    
naive_prediction_error = termStructure.long_prices[:-1].values - termStructure.long_prices[1:].values



In [49]:

    
print("Naive MSE. I definitely have to beat this!")
print(np.nanmean(np.square(naive_prediction_error)))









    



Naive MSE. I definitely have to beat this!
0.170281337234



In [50]:

    
exp_counter = pd.Series(Counter(expirations.days_to_expiration), dtype=int)
exp_counter.index = pd.to_numeric(exp_counter.index, downcast="integer")
exp_counter.plot.bar(figsize=(8,2), width=0.8)
plt.ylabel("Nr. of occurences")
plt.savefig("days_to_expiration.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()

Playing around with visualizations



In [51]:

    
xm_settle.dropna(how="all", axis=(0,1), inplace=True)
xm_settle.describe()









    Out[51]:







  
    
      
      M1
      M2
      M3
      M4
      M5
      M6
      M7
      M8
    
  
  
    
      count
      2656.000000
      2656.000000
      2656.000000
      2656.000000
      2656.000000
      2629.000000
      2470.000000
      2248.000000
    
    
      mean
      20.772100
      21.550528
      22.051373
      22.374050
      22.682241
      22.983538
      23.529346
      23.348955
    
    
      std
      8.747683
      7.765030
      7.115117
      6.633684
      6.301106
      6.037752
      5.825076
      5.562232
    
    
      min
      10.240000
      11.700000
      12.520000
      13.020000
      13.600000
      14.100000
      14.500000
      14.300000
    
    
      25%
      14.877500
      15.950000
      16.750000
      17.400000
      17.870001
      18.370001
      18.922500
      19.150000
    
    
      50%
      18.245000
      19.450001
      20.350000
      20.900000
      21.400000
      21.770000
      22.595000
      21.850000
    
    
      75%
      24.000000
      24.799999
      25.299999
      25.602500
      25.850000
      26.400000
      27.299999
      26.512500
    
    
      max
      67.900002
      59.520000
      54.619999
      50.430000
      47.759998
      45.990002
      44.580002
      44.000000



In [52]:

    
xm_settle[xm_settle.isnull().sum(axis=1) >= 5]



In [53]:

    
#exp_last = expirations.loc[xm_settle.iloc[-1].name]
#exp_last.index = xm_settle.iloc[-1].index
#templol = pd.concat((xm_settle.iloc[-1], exp_last), axis=1).T
#templol.index = ["Value", "Expiration"]
#templol.loc["Expiration"] = templol.loc["Expiration"].apply(lambda x: x.strftime("%b %d"))
#print(templol.to_latex())



In [54]:

    
xm_settle.iloc[::, ::-1].plot(colormap=cm.winter_r, figsize=(8,4), linewidth=1.0, alpha=0.8)
plt.xlabel("")
plt.savefig("termstructures.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [55]:

    
fig = plt.figure(figsize=(8, 4))
ax = fig.gca(projection='3d')
xs = np.arange(len(xm_settle))
zs = np.arange(0, 8)
verts = []
xm_settle_3dplot = xm_settle.copy()
xm_settle_3dplot.index = xs
for z in zs:
    ys = xm_settle_3dplot.iloc[:,int(z)].fillna(10)
    ys.iloc[0] = 10
    ys.iloc[-1] = 10
    verts.append(list(zip(ys.index.values, ys.values)))
poly = PolyCollection(verts, linewidth=2.0, facecolors=[cm.winter(i, 0.8) for i in  np.linspace(0, 1, 8)])
ax.add_collection3d(poly, zs=zs, zdir='y')

ax.set_xlim3d(0, len(xm_settle))
ax.set_xticks([(xm_settle.index.year == year).argmax() for year in xm_settle.index.year.unique()[1::2]])
ax.set_xticklabels(xm_settle.index.year.unique()[1::2])
ax.set_ylim3d(0.0, 7.5)
ax.set_yticklabels(["M1", "M2", "M3", "M4", "M5", "M6", "M7", "M8"])
ax.set_zlim3d(11, 50)

plt.savefig("termstructures.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [56]:

    
xm_settle.groupby(xm_settle.index.year).count().sum(axis=1).plot.bar(figsize=(4,3), width=0.7)
plt.xlabel("")
plt.ylabel("Number of samples")
plt.grid(axis="y")
plt.savefig("number_of_samples.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [57]:

    
spreads_descr = pd.concat({key:spreads[key] for key in spreads.columns}).describe()



In [58]:

    
termstr_descr = pd.concat({key:xm_settle[key] for key in xm_settle.columns}).describe()



In [59]:

    
print(pd.concat([termstr_descr, spreads_descr], axis=1, keys=["term structure", "spread prices"]).iloc[1:].T.to_latex(float_format='%.2f'))









    



\begin{tabular}{lrrrrrrr}
\toprule
{} &  mean &  std &    min &   25\% &   50\% &   75\% &  max \\
\midrule
term structure & 22.38 & 6.91 &  10.24 & 17.55 & 20.70 & 25.70 & 67.9 \\
spread prices  &  0.11 & 0.74 & -13.68 & -0.10 &  0.13 &  0.36 &  4.9 \\
\bottomrule
\end{tabular}



In [114]:

    
fig, axes = plt.subplots(nrows=2, sharex=True, figsize=(8,8))
xm_settle.plot(ax=axes[0], cmap=cm.brg, linewidth=0.8)
axes[0].grid(axis="x")
axes[0].set_ylabel("Futures price")
axes[0].legend(loc="upper left")
def plot_vlines(ax):
    l = len(xm_settle)
    s = int(l * 0.15 / 2)
    # Validation (red)
    ax.axvspan(xm_settle.index[int(l/2-s)], xm_settle.index[int(l / 2)], facecolor="r", alpha=0.3)
    ax.axvspan(xm_settle.index[-s], xm_settle.index[-1], facecolor="r", alpha=0.3)
    # Test (green)
    ax.axvspan(xm_settle.index[int(l/2-2*s)], xm_settle.index[int(l/2)-s], facecolor="g", alpha=0.3)
    ax.axvspan(xm_settle.index[-2*s], xm_settle.index[-s], facecolor="g", alpha=0.3)
plot_vlines(axes[0])
spreads.plot(ax=axes[1], cmap=cm.brg, linewidth=0.8)
axes[1].grid(axis="x")
axes[1].set_ylabel("Spread price")
plot_vlines(axes[1])
plt.xlabel("")
plt.savefig("validation-and-test-set.pdf", format="pdf", bbox_inches="tight", dpi=300)
plt.show()



In [ ]:

	M1	M2	M3	M4	M5	M6	M7	M8
date
2006-10-23	11.900000	1.250000	0.930000	0.570000	0.380000	0.500000	0.170000	NaN
2006-10-24	11.770000	1.040000	1.250000	0.440000	0.660000	0.250000	0.190001	NaN
2006-10-25	11.490000	1.030001	1.219999	0.730000	0.450000	0.510000	0.170000	NaN
2006-10-26	11.270000	0.950000	1.280000	0.660000	0.800000	0.290000	0.190000	NaN
2006-10-27	11.350000	0.900000	1.240000	0.740000	0.760000	0.200000	0.260000	NaN
2006-10-30	11.300000	0.970000	1.080000	0.799999	0.510000	0.440001	0.365000	NaN
2006-10-31	11.350000	0.799999	1.050000	0.750000	0.660000	0.370000	0.270000	NaN
2006-11-01	11.510000	0.940000	0.790000	0.710000	0.540000	0.390000	0.200000	NaN
2006-11-02	11.660000	0.780000	0.720000	0.640000	0.710000	0.240000	0.300000	NaN
2006-11-03	11.500000	0.980000	0.560000	0.900000	0.520000	0.300000	0.240000	NaN
2006-11-06	11.140000	0.860000	0.950000	0.770000	0.550000	0.419999	0.310000	NaN
2006-11-07	11.210000	0.860000	0.810000	0.809999	0.460000	0.380000	0.460000	NaN
2006-11-08	10.900000	0.920000	0.980000	0.500000	0.639999	0.490001	0.469999	NaN
2006-11-09	10.950000	0.810000	1.040000	0.500000	0.630000	0.520000	0.380000	NaN
2006-11-10	10.990000	0.990000	0.740001	0.440000	0.590000	0.640000	0.389999	NaN
2006-11-13	10.590000	1.360000	0.780000	0.450001	0.520000	0.630000	0.450000	NaN
2006-11-14	10.240000	1.460000	0.820001	0.500000	0.580000	0.670000	0.410000	NaN
2006-11-15	11.600000	0.700000	0.650000	0.600000	0.550000	0.410000	NaN	NaN
2006-11-16	11.600000	0.679999	0.640000	0.510000	0.570000	0.530000	NaN	NaN
2006-11-17	11.650000	0.700001	0.570000	0.480000	0.560000	0.540000	NaN	NaN
2006-11-20	11.400000	0.950001	0.719999	0.280001	0.549999	0.660001	0.889999	NaN
2006-11-21	11.520000	0.980000	0.780000	0.220000	0.450000	0.560000	NaN	NaN
2006-11-22	11.600000	1.040000	0.920000	0.160000	0.230000	0.840000	NaN	NaN
2006-11-24	11.710000	1.140000	0.690000	0.560000	0.170000	0.629999	NaN	NaN
2006-11-27	12.450000	0.990000	0.560000	0.370000	0.220000	0.520000	NaN	NaN
2006-11-28	11.950000	1.000000	0.710000	0.440001	0.349999	0.530000	NaN	NaN
2006-11-29	11.420000	1.180000	0.860000	0.470000	0.480000	0.540000	NaN	NaN
2006-11-30	11.360000	1.170000	0.750000	0.550000	0.550000	0.580000	NaN	NaN
2006-12-01	11.600000	1.150000	0.760000	0.510000	0.469999	0.660000	NaN	NaN
2006-12-04	11.500000	1.000000	0.680000	0.670000	0.540000	0.660000	NaN	NaN
...	...	...	...	...	...	...	...	...
2017-03-30	12.820000	0.530001	0.820000	0.880000	0.440000	0.790001	0.469999	0.250000
2017-03-31	13.280000	0.300000	0.670000	0.750000	0.500000	0.760000	0.440001	0.330000
2017-04-03	13.480000	0.100000	0.690001	0.809999	0.490000	0.770000	0.410000	0.299999
2017-04-04	13.240000	0.130000	0.760000	0.820000	0.450000	0.700001	0.400000	0.320000
2017-04-05	13.900000	-0.049999	0.700000	0.750000	0.380000	0.770000	0.289999	0.260000
2017-04-06	13.600000	-0.050000	0.599999	0.810000	0.390000	0.699999	0.420000	0.310001
2017-04-07	14.050000	-0.190001	0.690001	0.700000	0.350000	0.699999	0.400002	0.250000
2017-04-10	15.050000	-0.720000	0.460000	0.650000	0.320001	0.719999	0.320000	0.270000
2017-04-11	16.000000	-1.470000	0.350000	0.650000	0.340000	0.650001	0.330000	0.299999
2017-04-12	16.299999	-1.349999	0.070001	0.570000	0.309999	0.660000	0.320000	0.300001
2017-04-13	16.299999	-1.099999	0.070001	0.580000	0.250000	0.600000	0.369999	0.270000
2017-04-17	15.070000	-0.770000	0.250000	0.670000	0.330000	0.700000	0.350000	0.250000
2017-04-18	14.700000	-0.380000	0.220000	0.630000	0.320000	0.710001	0.349998	0.340000
2017-04-19	14.580000	-0.050000	0.590000	0.310000	0.740000	0.400000	0.300001	0.080000
2017-04-20	14.320000	0.020000	0.650000	0.390000	0.790000	0.400000	0.300001	0.029999
2017-04-21	14.300000	-0.060000	0.560000	0.349999	0.780001	0.410000	0.360001	0.049999
2017-04-24	12.680000	0.620000	0.889999	0.510000	0.890000	0.430000	0.359999	0.070002
2017-04-25	12.500000	0.600000	0.799999	0.500000	0.800000	0.450000	0.400000	0.130001
2017-04-26	12.350000	0.839999	0.860001	0.500000	0.800000	0.379999	0.360001	0.160000
2017-04-27	12.400000	0.700001	0.969999	0.510000	0.820000	0.400001	0.349999	0.100000
2017-04-28	12.250000	0.860000	0.870000	0.500000	0.780001	0.429999	0.410001	0.109999
2017-05-01	11.850000	0.750000	0.950000	0.570000	0.800000	0.380000	0.410000	0.160000
2017-05-02	11.800000	1.020000	0.930000	0.450000	0.850000	0.349999	0.350000	0.170000
2017-05-03	12.100000	0.849999	0.910000	0.490001	0.750000	0.450000	0.349999	0.150000
2017-05-04	11.730000	0.940001	0.930000	0.509999	0.840000	0.400001	0.450000	0.150000
2017-05-05	11.780000	0.890000	0.990000	0.560000	0.880000	0.450000	0.400000	0.160001
2017-05-08	11.400000	1.100000	1.200000	0.630000	0.850000	0.450000	0.370000	0.200001
2017-05-09	11.400000	1.080000	1.190001	0.680000	0.799999	0.470000	0.380000	0.190001
2017-05-10	11.550000	0.920000	1.210000	0.719999	0.900001	0.429999	0.370001	0.230000
2017-05-11	11.450000	1.160000	1.080000	0.660001	0.950000	0.429999	0.400000	0.170000

	M1	M2	M3	M4	M5	M6	M7	M8
count	2656.000000	2656.000000	2656.000000	2656.000000	2656.000000	2629.000000	2470.000000	2248.000000
mean	20.772100	21.550528	22.051373	22.374050	22.682241	22.983538	23.529346	23.348955
std	8.747683	7.765030	7.115117	6.633684	6.301106	6.037752	5.825076	5.562232
min	10.240000	11.700000	12.520000	13.020000	13.600000	14.100000	14.500000	14.300000
25%	14.877500	15.950000	16.750000	17.400000	17.870001	18.370001	18.922500	19.150000
50%	18.245000	19.450001	20.350000	20.900000	21.400000	21.770000	22.595000	21.850000
75%	24.000000	24.799999	25.299999	25.602500	25.850000	26.400000	27.299999	26.512500
max	67.900002	59.520000	54.619999	50.430000	47.759998	45.990002	44.580002	44.000000