In [5]:
from urllib import urlretrieve
url_path = 'http://www.stoxx.com/download/historical_values/'
stoxxeu600_url = url_path + 'hbrbcpe.txt'
vstoxx_url = url_path + 'h_vstoxx.txt'
data_folder = 'data/' # Save file to local target destination.
stoxxeu600_filepath = data_folder + "stoxxeu600.txt"
vstoxx_filepath = data_folder + "vstoxx.txt"
In [6]:
urlretrieve(stoxxeu600_url, stoxxeu600_filepath)
Out[6]:
In [7]:
urlretrieve(vstoxx_url, vstoxx_filepath)
Out[7]:
In [8]:
import os.path
os.path.isfile(stoxxeu600_filepath)
Out[8]:
In [9]:
os.path.isfile(vstoxx_filepath)
Out[9]:
In [10]:
with open(stoxxeu600_filepath, 'r') as opened_file:
for i in range(5):
print opened_file.readline()
In [11]:
import pandas as pd
columns = ['Date', 'SX5P', 'SX5E', 'SXXP', 'SXXE',
'SXXF', 'SXXA', 'DK5F', 'DKXF', 'EMPTY']
stoxxeu600 = pd.read_csv(stoxxeu600_filepath,
index_col=0,
parse_dates=True,
dayfirst=True,
header=None,
skiprows=4,
names=columns,
sep=';'
)
del stoxxeu600['EMPTY']
In [12]:
print stoxxeu600.info()
In [13]:
with open(vstoxx_filepath, 'r') as opened_file:
for i in range(5):
print opened_file.readline(),
In [14]:
vstoxx = pd.read_csv(vstoxx_filepath,
index_col=0,
parse_dates=True,
dayfirst=True,
header=2)
In [15]:
vstoxx.to_csv('data/vstoxx.csv')
In [16]:
print vstoxx.info()
In [17]:
import datetime as dt
cutoff_date = dt.datetime(1999, 1, 4)
data = pd.DataFrame(
{'EUROSTOXX' :stoxxeu600['SX5E'][stoxxeu600.index >= cutoff_date],
'VSTOXX':vstoxx['V2TX'][vstoxx.index >= cutoff_date]})
data.info()
In [18]:
print data.info()
In [19]:
print data.head(5)
In [20]:
print data.describe()
In [21]:
%pylab inline
data.plot(subplots=True,
figsize=(10, 8),
color="blue",
grid=True)
Out[21]:
In [22]:
data.diff().hist(figsize=(10, 5),
color='blue',
bins=100)
Out[22]:
In [23]:
data.pct_change().hist(figsize=(10, 5),
color='blue',
bins=100)
Out[23]:
In [24]:
%pylab inline
import numpy as np
log_returns = np.log(data / data.shift(1)).dropna()
log_returns.plot(subplots=True,
figsize=(10, 8),
color='blue',
grid=True)
Out[24]:
In [25]:
print log_returns.corr()
In [26]:
import statsmodels.api as sm
log_returns.plot(figsize=(10,8),
x="EUROSTOXX",
y="VSTOXX",
kind='scatter')
ols_fit = sm.OLS(log_returns['VSTOXX'].values,
log_returns['EUROSTOXX'].values).fit()
plot(log_returns['EUROSTOXX'], ols_fit.fittedvalues, 'r')
Out[26]:
In [27]:
pd.rolling_corr(log_returns['EUROSTOXX'],
log_returns['VSTOXX'],
window=252).plot(figsize=(10,8))
plt.ylabel('Rolling Annual Correlation')
Out[27]:
In [28]:
import calendar as cal
import datetime as dt
class OptionUtility(object):
def get_settlement_date(self, date):
""" Get third friday of the month """
day = 21 - (cal.weekday(date.year, date.month, 1) + 2) % 7
return dt.datetime(date.year, date.month, day, 12, 0, 0)
def get_date(self, web_date_string, date_format):
""" Parse a date from the web to a date object """
return dt.datetime.strptime(web_date_string, date_format)
def fwd_expiry_date(self, current_dt, months_fws):
return self.get_settlement_date(
current_dt + relativedelta(months=+months_fws))
import math
class VSTOXXCalculator(object):
def __init__(self):
self.secs_per_day = float(60*60*24)
self.secs_per_year = float(365*self.secs_per_day)
def calculate_sub_index(self, df, t_calc, t_settle, r):
T = (t_settle-t_calc).total_seconds()/self.secs_per_year
R = math.exp(r*T)
# Calculate dK
df["dK"] = 0
df["dK"][df.index[0]] = df.index[1]-df.index[0]
df["dK"][df.index[-1]] = df.index[-1]-df.index[-2]
df["dK"][df.index[1:-1]] = (df.index.values[2:]-
df.index.values[:-2])/2
# Calculate the forward price
df["AbsDiffCP"] = abs(df["Call"]-df["Put"])
min_val = min(df["AbsDiffCP"])
f_df = df[df["AbsDiffCP"]==min_val]
fwd_prices = f_df.index+R*(f_df["Call"]-f_df["Put"])
F = np.mean(fwd_prices)
# Get the strike not exceeding forward price
K_i0 = df.index[df.index <= F][-1]
# Calculate M(K(i,j))
df["MK"] = 0
df["MK"][df.index < K_i0] = df["Put"]
df["MK"][K_i0] = (df["Call"][K_i0]+df["Put"][K_i0])/2.
df["MK"][df.index > K_i0] = df["Call"]
# Apply the variance formula to get the sub-index
summation = sum(df["dK"]/(df.index.values**2)*R*df["MK"])
variance = 2/T*summation-1/T*(F/float(K_i0)-1)**2
subindex = 100*math.sqrt(variance)
return subindex
import urllib
from lxml import html
class EurexWebPage(object):
def __init__(self):
self.url = "%s%s%s%s%s" % (
"http://www.eurexchange.com/",
"exchange-en/market-data/statistics/",
"market-statistics-online/180102!",
"onlineStats?productGroupId=846&productId=19068",
"&viewType=3")
self.param_url = "&cp=%s&month=%s&year=%s&busDate=%s"
self.lastupdated_dateformat = "%b %d, %Y %H:%M:%S"
self.web_date_format = "%Y%m%d"
self.__strike_price_header__ = "Strike price"
self.__prices_header__ = "Daily settlem. price"
self.utility = OptionUtility()
def get_available_dates(self):
html_data = urllib.urlopen(self.url).read()
webpage = html.fromstring(html_data)
# Find the dates available on the website
dates_listed = webpage.xpath(
"//select[@name='busDate']" +
"/option")
return [date_element.get("value")
for date_element in reversed(dates_listed)]
def get_date_from_web_date(self, web_date):
return self.utility.get_date(web_date,
self.web_date_format)
def get_option_series_data(self, is_call,
current_dt, option_dt):
selected_date = current_dt.strftime(self.web_date_format)
option_type = "Call" if is_call else "Put"
target_url = (self.url +
self.param_url) % (option_type,
option_dt.month,
option_dt.year,
selected_date)
html_data = urllib.urlopen(target_url).read()
webpage = html.fromstring(html_data)
update_date = self.get_last_update_date(webpage)
indexes = self.get_data_headers_indexes(webpage)
data = self.__get_data_rows__(webpage,
indexes,
option_type)
return data, update_date
def __get_data_rows__(self, webpage, indexes, header):
data = pd.DataFrame()
for row in webpage.xpath("//table[@class='dataTable']/" +
"tbody/tr"):
columns = row.xpath("./td")
if len(columns) > max(indexes):
try:
[K, price] = \
[float(columns[i].text.replace(",",""))
for i in indexes]
data.set_value(K, header, price)
except:
continue
return data
def get_data_headers_indexes(self, webpage):
table_headers = webpage.xpath(
"//table[@class='dataTable']" + \
"/thead/th/text()")
indexes_of_interest = [
table_headers.index(
self.__strike_price_header__),
table_headers.index(
self.__prices_header__)]
return indexes_of_interest
def get_last_update_date(self, webpage):
return dt.datetime.strptime(webpage.
xpath("//p[@class='date']/b")
[-1].text,
self.lastupdated_dateformat)
import pandas as pd
from dateutil.relativedelta import relativedelta
import numpy as np
class VSTOXXSubIndex:
def __init__(self, path_to_subindexes):
self.sub_index_store_path = path_to_subindexes
self.utility = OptionUtility()
self.webpage = EurexWebPage()
self.calculator = VSTOXXCalculator()
self.csv_date_format = "%m/%d/%Y"
def start(self, months=2, r=0.015):
# For each date available, fetch the data
for selected_date in self.webpage.get_available_dates():
print "Collecting historical data for %s..." % \
selected_date
self.calculate_and_save_sub_indexes(
selected_date, months, r)
print "Completed."
def calculate_and_save_sub_indexes(self, selected_date,
months_fwd, r):
current_dt = self.webpage.get_date_from_web_date(
selected_date)
for i in range(1, months_fwd+1):
# Get settlement date of the expiring month
expiry_dt = self.utility.fwd_expiry_date(
current_dt, i)
# Get calls and puts of expiring month
dataset, update_dt = self.get_data(current_dt,
expiry_dt)
if not dataset.empty:
sub_index = self.calculator.calculate_sub_index(
dataset, update_dt, expiry_dt, r)
self.save_vstoxx_sub_index_to_csv(
current_dt, sub_index, i)
def save_vstoxx_sub_index_to_csv(self, current_dt,
sub_index, month):
subindex_df = None
try:
subindex_df = pd.read_csv(self.sub_index_store_path,
index_col=[0])
except:
subindex_df = pd.DataFrame()
display_date = current_dt.strftime(self.csv_date_format)
subindex_df.set_value(display_date,
"I" + str(month),
sub_index)
subindex_df.to_csv(self.sub_index_store_path)
def get_data(self, current_dt, expiry_dt):
""" Fetch and join calls and puts option series data """
calls, dt1 = self.webpage.get_option_series_data(
True, current_dt, expiry_dt)
puts, dt2 = self.webpage.get_option_series_data(
False, current_dt, expiry_dt)
option_series = calls.join(puts, how='inner')
if dt1 != dt2:
print "Error: 2 different underlying prices."
return option_series, dt1
In [29]:
import pandas as pd
vstoxx_sub_indexes = pd.read_csv('data/vstoxx_sub_indexes.csv',
index_col=[0],
parse_dates=True, dayfirst=False)
vstoxx = pd.read_csv('data/vstoxx.csv', index_col=[0],
parse_dates=True, dayfirst=False)
start_dt = min(vstoxx_sub_indexes.index.values)
vstoxx = vstoxx[vstoxx.index >= start_dt]
%pylab inline
new_pd = pd.DataFrame(vstoxx_sub_indexes["I2"])
new_pd = new_pd.join(vstoxx["V6I2"], how='inner')
new_pd.plot(figsize=(10, 6), grid=True)
Out[29]:
In [30]:
import math
def calculate_vstoxx_index(dataframe, col_name):
secs_per_day = float(60*60*24)
utility = OptionUtility()
for row_date, row in dataframe.iterrows():
# Set each expiry date with an
# expiration time of 5p.m
date = row_date.replace(hour=17)
# Ensure dates and sigmas are in legal range
expiry_date_1 = utility.get_settlement_date(date)
expiry_date_2 = utility.fwd_expiry_date(date, 1)
days_diff = (expiry_date_1-date).days
sigma_1, sigma_2 = row["V6I1"], row["V6I2"]
if -1 <= days_diff <= 1:
sigma_1, sigma_2 = row["V6I2"], row["V6I3"]
if days_diff <= 1:
expiry_date_1 = expiry_date_2
expiry_date_2 = utility.fwd_expiry_date(date, 2)
# Get expiration times in terms of seconds
Nti = (expiry_date_1-date).total_seconds()
Nti1 = (expiry_date_2-date).total_seconds()
# Calculate index as per VSTOXX formula in seconds
first_term = \
(Nti1-30*secs_per_day)/ \
(Nti1-Nti)*(sigma_1**2)*Nti/ \
(secs_per_day*365)
second_term = \
(30*secs_per_day-Nti)/ \
(Nti1-Nti)*(sigma_2**2)*Nti1/ \
(secs_per_day*365)
sub_index = math.sqrt(365.*(first_term+second_term)/30.)
dataframe.set_value(row_date, col_name, sub_index)
return dataframe
In [31]:
sample = vstoxx.tail(100) # From the previous section
sample = calculate_vstoxx_index(sample, "Calculated")
vstoxx_df = sample["V2TX"]
calculated_df = sample["Calculated"]
df = pd.DataFrame({'VSTOXX' : sample["V2TX"],
'Calculated' : sample["Calculated"]})
df.plot(figsize=(10, 6), grid=True, style=['ro','b'])
Out[31]:
In [ ]: