Pairs trading is a strategy that uses two stocks that are highly correlated. We can then use the difference in price between the two stocks as signal if one moves out of correlation with the other. It is an older strategy that is used classically as a guide to beginning algorithmic trading. There is a fantastic full guide and write up on Investopedia you can find here! I highly recommend reading the article in full before continuing, it is entertaining and informative!
Let's create our first basic trading algorithm! This is an exercise in using quantopian, NOT a realistic representation of what a good algorithm is! Never use something as simple as this in the real world! This is an extremely simplified version of Pairs Trading, we won't be considering factors such as cointegration!
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import quandl
In [2]:
start = '07-01-2015'
end = '07-01-2017'
In [3]:
united = quandl.get('WIKI/UAL',
start_date = start,
end_date = end)
american = quandl.get('WIKI/AAL',
start_date = start,
end_date = end)
In [4]:
united.head()
Out[4]:
In [5]:
american.head()
Out[5]:
In [6]:
american['Adj. Close'].plot(label = 'American Airlines',
figsize = (12, 8))
united['Adj. Close'].plot(label = 'United Airlines')
plt.legend()
Out[6]:
In [7]:
np.corrcoef(american['Adj. Close'],
united['Adj. Close'])
Out[7]:
In [8]:
spread = american['Adj. Close'] - united['Adj. Close']
spread.plot(label='Spread',
figsize = (12,8))
plt.axhline(spread.mean(),
c = 'r')
plt.legend()
Out[8]:
In [9]:
def zscore(stocks):
return (stocks - stocks.mean()) / np.std(stocks)
In [10]:
zscore(spread).plot(figsize = (14,8))
plt.axhline(zscore(spread).mean(),
color = 'black')
plt.axhline(1.0, c = 'r', ls = '--')
plt.axhline(-1.0, c = 'g', ls = '--')
plt.legend(['Spread z-score', 'Mean', '+1', '-1']);
In [11]:
#1 day moving average of the price spread
spread_mavg1 = spread.rolling(1).mean()
# 30 day moving average of the price spread
spread_mavg30 = spread.rolling(30).mean()
# Take a rolling 30 day standard deviation
std_30 = spread.rolling(30).std()
# Compute the z score for each day
zscore_30_1 = (spread_mavg1 - spread_mavg30) / std_30
zscore_30_1.plot(figsize = (12, 8),
label = 'Rolling 30 day Z score')
plt.axhline(0, color = 'black')
plt.axhline(1.0, color = 'red', linestyle = '--');
In [12]:
import numpy as np
def initialize(context):
"""
Called once at the start of the algorithm.
"""
# Every day we check the pair status
schedule_function(check_pairs, date_rules.every_day(), time_rules.market_close(minutes = 60))
# Our Two Airlines
context.aa = sid(45971) #aal
context.ual = sid(28051) #ual
# Flags to tell us if we're currently in a trade
context.long_on_spread = False
context.shorting_spread = False
def check_pairs(context, data):
# For convenience
aa = context.aa
ual = context.ual
# Get pricing history
prices = data.history([aa, ual], "price", 30, '1d')
# Need to use .iloc[-1:] to get dataframe instead of series
short_prices = prices.iloc[-1:]
# Get the long 30 day mavg
mavg_30 = np.mean(prices[aa] - prices[ual])
# Get the std of the 30 day long window
std_30 = np.std(prices[aa] - prices[ual])
# Get the shorter span 1 day mavg
mavg_1 = np.mean(short_prices[aa] - short_prices[ual])
# Compute z-score
if std_30 > 0:
zscore = (mavg_1 - mavg_30)/std_30
# Our two entry cases
if zscore > 0.5 and not context.shorting_spread:
# spread = aa - ual
order_target_percent(aa, -0.5) # short top
order_target_percent(ual, 0.5) # long bottom
context.shorting_spread = True
context.long_on_spread = False
elif zscore < -0.5 and not context.long_on_spread:
# spread = aa - ual
order_target_percent(aa, 0.5) # long top
order_target_percent(ual, -0.5) # short bottom
context.shorting_spread = False
context.long_on_spread = True
# Our exit case
elif abs(zscore) < 0.1:
order_target_percent(aa, 0)
order_target_percent(ual, 0)
context.shorting_spread = False
context.long_on_spread = False
record('zscore', zscore)