How to Form a Good Cointegrating (and Mean-Reverting) Pair of Stocks


In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [20]:
from statsmodels.tsa.stattools import coint

In [21]:
from statsmodels.api import OLS

In [5]:
df1=pd.read_excel('GLD.xls')

In [6]:
df2=pd.read_excel('GDX.xls')

In [7]:
df=pd.merge(df1, df2, on='Date', suffixes=('_GLD', '_GDX'))

In [8]:
df.set_index('Date', inplace=True)

In [9]:
df.sort_index(inplace=True)

Run cointegration (Engle-Granger) test


In [18]:
coint_t, pvalue, crit_value=coint(df['Adj Close_GLD'], df['Adj Close_GDX'])

In [19]:
(coint_t, pvalue, crit_value) # abs(t-stat) > critical value at 95%. pvalue says probability of null hypothesis (of no cointegration) is only 1.8%


Out[19]:
(-3.6981160763300593,
 0.018427835409537425,
 array([-3.92518794, -3.35208799, -3.05551324]))

Determine hedge ratio


In [22]:
model=OLS(df['Adj Close_GLD'], df['Adj Close_GDX'])

In [23]:
results=model.fit()

In [24]:
hedgeRatio=results.params

In [25]:
hedgeRatio


Out[25]:
Adj Close_GDX    1.639523
dtype: float64

spread = GLD - hedgeRatio*GDX


In [26]:
spread=df['Adj Close_GLD']-hedgeRatio[0]*df['Adj Close_GDX']

In [27]:
plt.plot(spread)


Out[27]:
[<matplotlib.lines.Line2D at 0x1ae016bccc0>]