Pair Trading of GLD and GDX


In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
import statsmodels.api as sm

In [5]:
df1=pd.read_excel('GLD.xls')

In [6]:
df2=pd.read_excel('GDX.xls')

In [7]:
df=pd.merge(df1, df2, on='Date', suffixes=('_GLD', '_GDX'))

In [8]:
df.set_index('Date', inplace=True)

In [9]:
df.sort_index(inplace=True)

In [10]:
trainset=np.arange(0, 252)

In [11]:
testset=np.arange(trainset.shape[0], df.shape[0])

Determine hedge ratio on trainset


In [12]:
model=sm.OLS(df.loc[:, 'Adj Close_GLD'].iloc[trainset], df.loc[:, 'Adj Close_GDX'].iloc[trainset])

In [13]:
results=model.fit()

In [14]:
hedgeRatio=results.params

In [15]:
hedgeRatio


Out[15]:
Adj Close_GDX    1.631009
dtype: float64

spread = GLD - hedgeRatio*GDX


In [16]:
spread=df.loc[:, 'Adj Close_GLD']-hedgeRatio[0]*df.loc[:, 'Adj Close_GDX']

In [17]:
plt.plot(spread.iloc[trainset])


Out[17]:
[<matplotlib.lines.Line2D at 0x23ce082ec18>]

In [18]:
plt.plot(spread.iloc[testset])


Out[18]:
[<matplotlib.lines.Line2D at 0x23ce06a7240>]

In [19]:
spreadMean=np.mean(spread.iloc[trainset])

In [20]:
spreadMean


Out[20]:
0.05219623850035999

In [21]:
spreadStd=np.std(spread.iloc[trainset])

In [22]:
spreadStd


Out[22]:
1.944860873496509

In [23]:
df['zscore']=(spread-spreadMean)/spreadStd

In [24]:
df['positions_GLD']=np.nan

In [25]:
df['positions_GDX']=np.nan

In [26]:
df.loc[df.zscore>=2, ('positions_GLD', 'positions_GDX')]=[-1, 1] # Buy spread

In [27]:
df.loc[df.zscore<=-2, ('positions_GLD', 'positions_GDX')]=[1, -1] # Short spread

In [28]:
df.loc[abs(df.zscore)<=1, ('positions_GLD', 'positions_GDX')]=0 # Exit spread

In [29]:
df.fillna(method='ffill', inplace=True) # ensure existing positions are carried forward unless there is an exit signal

In [30]:
positions=df.loc[:, ('positions_GLD', 'positions_GDX')]

In [31]:
dailyret=df.loc[:, ('Adj Close_GLD', 'Adj Close_GDX')].pct_change()

In [32]:
pnl=(np.array(positions.shift())*np.array(dailyret)).sum(axis=1)

In [33]:
sharpeTrainset=np.sqrt(252)*np.mean(pnl[trainset[1:]])/np.std(pnl[trainset[1:]])

In [34]:
sharpeTrainset


Out[34]:
2.3324949367127323

In [35]:
sharpeTestset=np.sqrt(252)*np.mean(pnl[testset])/np.std(pnl[testset])

In [36]:
sharpeTestset


Out[36]:
1.5139137120213677

In [37]:
plt.plot(np.cumsum(pnl[testset]))


Out[37]:
[<matplotlib.lines.Line2D at 0x23ce0b844a8>]

In [38]:
positions.to_pickle('example3_6_positions')