How to Form a Good Cointegrating (and Mean-Reverting) Pair of Stocks



In [1]:

    
import numpy as np



In [2]:

    
import pandas as pd



In [3]:

    
import matplotlib.pyplot as plt



In [20]:

    
from statsmodels.tsa.stattools import coint



In [21]:

    
from statsmodels.api import OLS



In [5]:

    
df1=pd.read_excel('GLD.xls')



In [6]:

    
df2=pd.read_excel('GDX.xls')



In [7]:

    
df=pd.merge(df1, df2, on='Date', suffixes=('_GLD', '_GDX'))



In [8]:

    
df.set_index('Date', inplace=True)



In [9]:

    
df.sort_index(inplace=True)

Run cointegration (Engle-Granger) test



In [18]:

    
coint_t, pvalue, crit_value=coint(df['Adj Close_GLD'], df['Adj Close_GDX'])



In [19]:

    
(coint_t, pvalue, crit_value) # abs(t-stat) > critical value at 95%. pvalue says probability of null hypothesis (of no cointegration) is only 1.8%









    Out[19]:





(-3.6981160763300593,
 0.018427835409537425,
 array([-3.92518794, -3.35208799, -3.05551324]))

Determine hedge ratio



In [22]:

    
model=OLS(df['Adj Close_GLD'], df['Adj Close_GDX'])



In [23]:

    
results=model.fit()



In [24]:

    
hedgeRatio=results.params



In [25]:

    
hedgeRatio









    Out[25]:





Adj Close_GDX    1.639523
dtype: float64

spread = GLD - hedgeRatio*GDX



In [26]:

    
spread=df['Adj Close_GLD']-hedgeRatio[0]*df['Adj Close_GDX']



In [27]:

    
plt.plot(spread)









    Out[27]:





[<matplotlib.lines.Line2D at 0x1ae016bccc0>]