In [5]:
from urllib import urlretrieve

url_path = 'http://www.stoxx.com/download/historical_values/'
stoxxeu600_url = url_path + 'hbrbcpe.txt'
vstoxx_url = url_path + 'h_vstoxx.txt'

data_folder = 'data/'  # Save file to local target destination.

stoxxeu600_filepath = data_folder + "stoxxeu600.txt"
vstoxx_filepath = data_folder + "vstoxx.txt"

In [6]:
urlretrieve(stoxxeu600_url, stoxxeu600_filepath)


Out[6]:
('data/stoxxeu600.txt', <httplib.HTTPMessage instance at 0x105b40b90>)

In [7]:
urlretrieve(vstoxx_url, vstoxx_filepath)


Out[7]:
('data/vstoxx.txt', <httplib.HTTPMessage instance at 0x105f6d7e8>)

In [8]:
import os.path
os.path.isfile(stoxxeu600_filepath)


Out[8]:
True

In [9]:
os.path.isfile(vstoxx_filepath)


Out[9]:
True

In [10]:
with open(stoxxeu600_filepath, 'r') as opened_file:
    for i in range(5):
        print opened_file.readline()


Price Indices - EURO Currency

Date    ;Blue-Chip;Blue-Chip;Broad    ; Broad   ;Ex UK    ;Ex Euro Zone;Blue-Chip; Broad

        ;  Europe ;Euro-Zone;Europe   ;Euro-Zone;         ;            ; Nordic  ; Nordic

        ;  SX5P   ;  SX5E   ;SXXP     ;SXXE     ; SXXF    ;    SXXA    ;    DK5F ; DKXF

31.12.1986;775.00 ;  900.82 ;   82.76 ;   98.58 ;   98.06 ;   69.06 ;  645.26  ;  65.56


In [11]:
import pandas as pd

columns = ['Date', 'SX5P', 'SX5E', 'SXXP', 'SXXE',
           'SXXF', 'SXXA', 'DK5F', 'DKXF', 'EMPTY']
stoxxeu600 = pd.read_csv(stoxxeu600_filepath,
                 index_col=0,
                 parse_dates=True,                 
                 dayfirst=True,
                 header=None,
                 skiprows=4, 
                 names=columns,
                 sep=';'
                 )  
del stoxxeu600['EMPTY']

In [12]:
print stoxxeu600.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7293 entries, 1986-12-31 00:00:00 to 2015-04-17 00:00:00
Data columns (total 8 columns):
SX5P    7293 non-null float64
SX5E    7293 non-null float64
SXXP    7293 non-null float64
SXXE    7293 non-null float64
SXXF    7293 non-null float64
SXXA    7293 non-null float64
DK5F    7293 non-null float64
DKXF    7293 non-null float64
dtypes: float64(8)None

In [13]:
with open(vstoxx_filepath, 'r') as opened_file:
    for i in range(5):
        print opened_file.readline(),


EURO STOXX 50 Volatility Indices,,,,,,,,,
 ,VSTOXX,Sub-Index 1M,Sub-Index 2M,Sub-Index 3M,Sub-Index 6M,Sub-Index 9M,Sub-Index 12M,Sub-Index 18M,Sub-Index 24M
Date,V2TX,V6I1,V6I2,V6I3,V6I4,V6I5,V6I6,V6I7,V6I8
04.01.1999,18.2033,21.2458,17.5555,31.2179,33.3124,33.7327,33.2232,31.8535,23.8209
05.01.1999,29.6912,36.6400,28.4274,32.6922,33.7326,33.1724,32.8457,32.2904,25.0532

In [14]:
vstoxx = pd.read_csv(vstoxx_filepath,
                 index_col=0, 
                 parse_dates=True, 
                 dayfirst=True,
                 header=2)

In [15]:
vstoxx.to_csv('data/vstoxx.csv')

In [16]:
print vstoxx.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4148 entries, 1999-01-04 00:00:00 to 2015-04-17 00:00:00
Data columns (total 9 columns):
V2TX    4148 non-null float64
V6I1    3715 non-null float64
V6I2    4148 non-null float64
V6I3    4094 non-null float64
V6I4    4148 non-null float64
V6I5    4148 non-null float64
V6I6    4132 non-null float64
V6I7    4148 non-null float64
V6I8    4136 non-null float64
dtypes: float64(9)None

Merging the Data


In [17]:
import datetime as dt

cutoff_date = dt.datetime(1999, 1, 4)
data = pd.DataFrame(
{'EUROSTOXX' :stoxxeu600['SX5E'][stoxxeu600.index >= cutoff_date],
 'VSTOXX':vstoxx['V2TX'][vstoxx.index >= cutoff_date]})
data.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4175 entries, 1999-01-04 00:00:00 to 2015-04-17 00:00:00
Data columns (total 2 columns):
EUROSTOXX    4174 non-null float64
VSTOXX       4148 non-null float64
dtypes: float64(2)

In [18]:
print data.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4175 entries, 1999-01-04 00:00:00 to 2015-04-17 00:00:00
Data columns (total 2 columns):
EUROSTOXX    4174 non-null float64
VSTOXX       4148 non-null float64
dtypes: float64(2)None

In [19]:
print data.head(5)


            EUROSTOXX   VSTOXX
Date                          
1999-01-04    3543.10  18.2033
1999-01-05    3604.67  29.6912
1999-01-06    3685.36  25.1670
1999-01-07    3627.87  32.5205
1999-01-08    3616.57  33.2296

[5 rows x 2 columns]

Financial Analytics of SX5E and V2TX


In [20]:
print data.describe()


         EUROSTOXX       VSTOXX
count  4174.000000  4148.000000
mean   3258.150096    25.226004
std     784.665419     9.833513
min    1809.980000    11.596600
25%    2672.825000    18.465725
50%    3053.425000    23.114950
75%    3744.512500    28.320875
max    5464.430000    87.512700

[8 rows x 2 columns]

In [21]:
%pylab inline
data.plot(subplots=True,
          figsize=(10, 8),          
          color="blue",
          grid=True)


Populating the interactive namespace from numpy and matplotlib
Out[21]:
array([<matplotlib.axes.AxesSubplot object at 0x10c88fdd0>,
       <matplotlib.axes.AxesSubplot object at 0x10c9b6990>], dtype=object)

In [22]:
data.diff().hist(figsize=(10, 5),
                 color='blue',
                 bins=100)


Out[22]:
array([[<matplotlib.axes.AxesSubplot object at 0x10cb27790>,
        <matplotlib.axes.AxesSubplot object at 0x10bb36390>]], dtype=object)

In [23]:
data.pct_change().hist(figsize=(10, 5),
                       color='blue',
                       bins=100)


Out[23]:
array([[<matplotlib.axes.AxesSubplot object at 0x10d54cf50>,
        <matplotlib.axes.AxesSubplot object at 0x10d5fd6d0>]], dtype=object)

In [24]:
%pylab inline
import numpy as np

log_returns = np.log(data / data.shift(1)).dropna()
log_returns.plot(subplots=True,
                 figsize=(10, 8),    
                 color='blue',
                 grid=True)


Populating the interactive namespace from numpy and matplotlib
Out[24]:
array([<matplotlib.axes.AxesSubplot object at 0x10df0e750>,
       <matplotlib.axes.AxesSubplot object at 0x10dfbcc50>], dtype=object)

Correlation between SX5E and V2TX


In [25]:
print log_returns.corr()


           EUROSTOXX    VSTOXX
EUROSTOXX   1.000000 -0.731764
VSTOXX     -0.731764  1.000000

[2 rows x 2 columns]

In [26]:
import statsmodels.api as sm

log_returns.plot(figsize=(10,8),
                 x="EUROSTOXX", 
                 y="VSTOXX",
                 kind='scatter')

ols_fit = sm.OLS(log_returns['VSTOXX'].values, 
            log_returns['EUROSTOXX'].values).fit()

plot(log_returns['EUROSTOXX'], ols_fit.fittedvalues, 'r')


Out[26]:
[<matplotlib.lines.Line2D at 0x10e08d150>]

In [27]:
pd.rolling_corr(log_returns['EUROSTOXX'], 
                 log_returns['VSTOXX'], 
                 window=252).plot(figsize=(10,8))
plt.ylabel('Rolling Annual Correlation')


Out[27]:
<matplotlib.text.Text at 0x110362350>

Implementation for the VSTOXX Sub-Index Value


In [28]:
import calendar as cal
import datetime as dt


class OptionUtility(object):

    def get_settlement_date(self, date):
        """ Get third friday of the month """
        day = 21 - (cal.weekday(date.year, date.month, 1) + 2) % 7
        return dt.datetime(date.year, date.month, day, 12, 0, 0)

    def get_date(self, web_date_string, date_format):
        """  Parse a date from the web to a date object """
        return dt.datetime.strptime(web_date_string, date_format)

    def fwd_expiry_date(self, current_dt, months_fws):
        return self.get_settlement_date(
            current_dt + relativedelta(months=+months_fws))



import math


class VSTOXXCalculator(object):

    def __init__(self):
        self.secs_per_day = float(60*60*24)
        self.secs_per_year = float(365*self.secs_per_day)

    def calculate_sub_index(self, df, t_calc, t_settle, r):
        T = (t_settle-t_calc).total_seconds()/self.secs_per_year
        R = math.exp(r*T)

        # Calculate dK
        df["dK"] = 0
        df["dK"][df.index[0]] = df.index[1]-df.index[0]
        df["dK"][df.index[-1]] = df.index[-1]-df.index[-2]
        df["dK"][df.index[1:-1]] = (df.index.values[2:]-
                                    df.index.values[:-2])/2
        # Calculate the forward price
        df["AbsDiffCP"] = abs(df["Call"]-df["Put"])
        min_val = min(df["AbsDiffCP"])
        f_df = df[df["AbsDiffCP"]==min_val]
        fwd_prices = f_df.index+R*(f_df["Call"]-f_df["Put"])
        F = np.mean(fwd_prices)

        # Get the strike not exceeding forward price
        K_i0 = df.index[df.index <= F][-1]

        # Calculate M(K(i,j))
        df["MK"] = 0
        df["MK"][df.index < K_i0] = df["Put"]
        df["MK"][K_i0] = (df["Call"][K_i0]+df["Put"][K_i0])/2.
        df["MK"][df.index > K_i0] = df["Call"]

        # Apply the variance formula to get the sub-index
        summation = sum(df["dK"]/(df.index.values**2)*R*df["MK"])
        variance = 2/T*summation-1/T*(F/float(K_i0)-1)**2
        subindex = 100*math.sqrt(variance)
        return subindex


import urllib
from lxml import html


class EurexWebPage(object):

    def __init__(self):
        self.url = "%s%s%s%s%s" % (
            "http://www.eurexchange.com/",
            "exchange-en/market-data/statistics/",
            "market-statistics-online/180102!",
            "onlineStats?productGroupId=846&productId=19068",
            "&viewType=3")
        self.param_url = "&cp=%s&month=%s&year=%s&busDate=%s"
        self.lastupdated_dateformat = "%b %d, %Y %H:%M:%S"
        self.web_date_format = "%Y%m%d"
        self.__strike_price_header__ = "Strike price"
        self.__prices_header__ = "Daily settlem. price"
        self.utility = OptionUtility()

    def get_available_dates(self):
        html_data = urllib.urlopen(self.url).read()
        webpage = html.fromstring(html_data)

        # Find the dates available on the website
        dates_listed = webpage.xpath(
            "//select[@name='busDate']" +
            "/option")

        return [date_element.get("value")
                for date_element in reversed(dates_listed)]

    def get_date_from_web_date(self, web_date):
        return self.utility.get_date(web_date,
                                     self.web_date_format)

    def get_option_series_data(self, is_call,
                               current_dt, option_dt):
        selected_date = current_dt.strftime(self.web_date_format)
        option_type = "Call" if is_call else "Put"
        target_url = (self.url +
                      self.param_url) % (option_type,
                                         option_dt.month,
                                         option_dt.year,
                                         selected_date)
        html_data = urllib.urlopen(target_url).read()
        webpage = html.fromstring(html_data)
        update_date = self.get_last_update_date(webpage)
        indexes = self.get_data_headers_indexes(webpage)
        data = self.__get_data_rows__(webpage,
                                      indexes,
                                      option_type)
        return data, update_date

    def __get_data_rows__(self, webpage, indexes, header):
        data = pd.DataFrame()
        for row in webpage.xpath("//table[@class='dataTable']/" +
                                 "tbody/tr"):
            columns = row.xpath("./td")
            if len(columns) > max(indexes):
                try:
                    [K, price] = \
                        [float(columns[i].text.replace(",",""))
                         for i in indexes]
                    data.set_value(K, header, price)
                except:
                    continue
        return data

    def get_data_headers_indexes(self, webpage):
        table_headers = webpage.xpath(
            "//table[@class='dataTable']" + \
            "/thead/th/text()")
        indexes_of_interest = [
            table_headers.index(
                self.__strike_price_header__),
            table_headers.index(
                self.__prices_header__)]
        return indexes_of_interest

    def get_last_update_date(self, webpage):
        return dt.datetime.strptime(webpage.
                                    xpath("//p[@class='date']/b")
                                    [-1].text,
                                    self.lastupdated_dateformat)

import pandas as pd

from dateutil.relativedelta import relativedelta
import numpy as np

class VSTOXXSubIndex:
    
    def __init__(self, path_to_subindexes):
        self.sub_index_store_path = path_to_subindexes
        self.utility = OptionUtility()
        self.webpage = EurexWebPage()
        self.calculator = VSTOXXCalculator()
        self.csv_date_format = "%m/%d/%Y"

    def start(self, months=2, r=0.015):
        # For each date available, fetch the data
        for selected_date in self.webpage.get_available_dates():
            print "Collecting historical data for %s..." % \
                  selected_date
            self.calculate_and_save_sub_indexes(
                selected_date, months, r)

        print "Completed."
    
    def calculate_and_save_sub_indexes(self, selected_date, 
                                       months_fwd, r):
        current_dt = self.webpage.get_date_from_web_date(
            selected_date)

        for i in range(1, months_fwd+1):
            # Get settlement date of the expiring month
            expiry_dt = self.utility.fwd_expiry_date(
                current_dt, i)
                
            # Get calls and puts of expiring month
            dataset, update_dt = self.get_data(current_dt, 
                                               expiry_dt)                        
            if not dataset.empty:
                sub_index = self.calculator.calculate_sub_index(
                    dataset, update_dt, expiry_dt, r)
                self.save_vstoxx_sub_index_to_csv(
                    current_dt, sub_index, i)
            
    def save_vstoxx_sub_index_to_csv(self, current_dt, 
                                     sub_index, month):
        subindex_df = None
        try:
            subindex_df = pd.read_csv(self.sub_index_store_path, 
                                      index_col=[0])
        except:
            subindex_df = pd.DataFrame()
                        
        display_date = current_dt.strftime(self.csv_date_format)
        subindex_df.set_value(display_date, 
                              "I" + str(month), 
                              sub_index)
        subindex_df.to_csv(self.sub_index_store_path)
                    
    def get_data(self, current_dt, expiry_dt):
        """ Fetch and join calls and puts option series data """
        calls, dt1 = self.webpage.get_option_series_data(
            True, current_dt, expiry_dt)
        puts, dt2 = self.webpage.get_option_series_data(
            False, current_dt, expiry_dt)
        option_series = calls.join(puts, how='inner')            
        if dt1 != dt2:           
            print "Error: 2 different underlying prices."   
            
        return option_series, dt1

Analyzing the results


In [29]:
import pandas as pd

vstoxx_sub_indexes = pd.read_csv('data/vstoxx_sub_indexes.csv', 
                                 index_col=[0], 
                                 parse_dates=True, dayfirst=False)
vstoxx = pd.read_csv('data/vstoxx.csv', index_col=[0], 
                     parse_dates=True, dayfirst=False)

start_dt = min(vstoxx_sub_indexes.index.values)
vstoxx = vstoxx[vstoxx.index >= start_dt]

%pylab inline
new_pd = pd.DataFrame(vstoxx_sub_indexes["I2"])
new_pd = new_pd.join(vstoxx["V6I2"], how='inner')
new_pd.plot(figsize=(10, 6), grid=True)


Populating the interactive namespace from numpy and matplotlib
Out[29]:
<matplotlib.axes.AxesSubplot at 0x110694fd0>

Calculating the VSTOXX Main Index


In [30]:
import math

def calculate_vstoxx_index(dataframe, col_name):    
    secs_per_day = float(60*60*24)
    utility = OptionUtility()
    
    for row_date, row in dataframe.iterrows():
        # Set each expiry date with an 
        # expiration time of 5p.m
        date = row_date.replace(hour=17)  
        
        # Ensure dates and sigmas are in legal range
        expiry_date_1 = utility.get_settlement_date(date)
        expiry_date_2 = utility.fwd_expiry_date(date, 1)
        days_diff = (expiry_date_1-date).days
        sigma_1, sigma_2 = row["V6I1"], row["V6I2"]        
        if -1 <= days_diff <= 1:
            sigma_1, sigma_2 = row["V6I2"], row["V6I3"]        
        if days_diff <= 1:
            expiry_date_1 = expiry_date_2
            expiry_date_2 = utility.fwd_expiry_date(date, 2)   
            
        # Get expiration times in terms of seconds
        Nti = (expiry_date_1-date).total_seconds()
        Nti1 = (expiry_date_2-date).total_seconds()
        
        # Calculate index as per VSTOXX formula in seconds
        first_term = \
            (Nti1-30*secs_per_day)/ \
            (Nti1-Nti)*(sigma_1**2)*Nti/ \
            (secs_per_day*365)
        second_term = \
            (30*secs_per_day-Nti)/ \
            (Nti1-Nti)*(sigma_2**2)*Nti1/ \
            (secs_per_day*365)
        sub_index = math.sqrt(365.*(first_term+second_term)/30.)    
        dataframe.set_value(row_date, col_name, sub_index)
        
    return dataframe

In [31]:
sample = vstoxx.tail(100)  # From the previous section 
sample = calculate_vstoxx_index(sample, "Calculated")

vstoxx_df = sample["V2TX"]
calculated_df = sample["Calculated"]
df = pd.DataFrame({'VSTOXX' : sample["V2TX"],
                  'Calculated' : sample["Calculated"]})
df.plot(figsize=(10, 6), grid=True, style=['ro','b'])


Out[31]:
<matplotlib.axes.AxesSubplot at 0x1108240d0>

In [ ]: