In [1]:
import numpy as np
In [2]:
import pandas as pd
In [3]:
import matplotlib.pyplot as plt
In [20]:
from statsmodels.tsa.stattools import coint
In [21]:
from statsmodels.api import OLS
In [5]:
df1=pd.read_excel('GLD.xls')
In [6]:
df2=pd.read_excel('GDX.xls')
In [7]:
df=pd.merge(df1, df2, on='Date', suffixes=('_GLD', '_GDX'))
In [8]:
df.set_index('Date', inplace=True)
In [9]:
df.sort_index(inplace=True)
In [18]:
coint_t, pvalue, crit_value=coint(df['Adj Close_GLD'], df['Adj Close_GDX'])
In [19]:
(coint_t, pvalue, crit_value) # abs(t-stat) > critical value at 95%. pvalue says probability of null hypothesis (of no cointegration) is only 1.8%
Out[19]:
In [22]:
model=OLS(df['Adj Close_GLD'], df['Adj Close_GDX'])
In [23]:
results=model.fit()
In [24]:
hedgeRatio=results.params
In [25]:
hedgeRatio
Out[25]:
In [26]:
spread=df['Adj Close_GLD']-hedgeRatio[0]*df['Adj Close_GDX']
In [27]:
plt.plot(spread)
Out[27]: