Testing Suite for Pairs-Trading Project


In [1]:
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as stat
import statsmodels.tsa.stattools as ts
from itertools import combinations
import multiprocessing
from multiprocessing import Pool
import cPickle as pkl

Distance Test


In [2]:
# Create list of pairs
pairList = list(combinations(range(10), 2))
# Create distance test data
data = np.array([np.arange(10)**2]*10)+1
distDF = pd.DataFrame(data)
distDF.head()


Out[2]:
0 1 2 3 4 5 6 7 8 9
0 1 2 5 10 17 26 37 50 65 82
1 1 2 5 10 17 26 37 50 65 82
2 1 2 5 10 17 26 37 50 65 82
3 1 2 5 10 17 26 37 50 65 82
4 1 2 5 10 17 26 37 50 65 82

5 rows × 10 columns


In [3]:
# Define distance function
def dist(tlist):
    xname = tlist[0]
    yname = tlist[1]
    # Don't standardize since constant values
    x = distDF[xname]#/(distDF[xname][0])
    y = distDF[yname]#/(distDF[yname][0])
    # Remove any na values
    z = (x-y).dropna()
    # Only consider pairs with most of the data present
    # if len(z) > 495:
    return([(xname, yname)], sum(map(lambda z:z**2, z)))
    #else:
    #    return()

In [4]:
# Define distance test function
def test_distFun():
    if __name__ == '__main__':
        trainDistPool = Pool(processes=4)
        # Test just the first 100 pairs - remove [0:100] for full test - warning, takes a long time!!
        trainDistResult = pd.DataFrame(trainDistPool.map(dist, pairList))
        print trainDistResult[1][4]
    trainDistPool.close()
    assert trainDistResult[1][4] == 10*(5**4)

Cointegration Test


In [21]:
## Successful test data

#Use a standard normal distribution to generate one data set, then generate another using the standard normal data set and 
#add or subtracting any number between 0 and 1 to get two cointegrated data sets.

normalrand = np.random.normal(0,1,100)        #normal(0,1)
cointnorm = normalrand + np.random.uniform(-1,1,100)  #normal(0,1) + uniform(-1,1)

In [22]:
## Fail test data
#Example 1
x = np.cumsum(np.random.normal(0,1, 1000))
y = np.cumsum(np.random.normal(0,1, 1000))

#Example 2
x1 = np.cumsum(np.random.uniform(1000,10000,1000))
y1 = np.cumsum(np.random.exponential(10,1000))

In [23]:
# Define cointegration function
def cointegration(y, x):
        ctresult = stat.OLS(y, x).fit()
        return(ts.adfuller(ctresult.resid))

In [24]:
# Define cointegration test
def test_cointFun():
    assert cointegration(normalrand,cointnorm)[1] <= 0.05

def test_cointFail():
    assert cointegration(y,x)[1] >= 0.05

def test_cointFail2():
    assert cointegration(y1,x1)[1] >= 0.05

Correlation Test


In [9]:
#For correlated data, use standard normal distribution so we can use the bivariate normal formula to generate two sets of
#correlated data.

rho = .5  #pick some correlation threshold

xCorr = np.random.normal(0,1,100)
yCorr = rho*xCorr + (1-rho**2)*np.random.normal(0,1,100)  #bivariate normal

In [10]:
#define correlation function
def correlate(tlist):
    try:
        xname = tlist[0]
        yname = tlist[1]
        x = xname #trainData[xname]
        y = yname #trainData[yname]
        #if min(x.count(), y.count()) > 490:
        #corrs = x.corr(y)
        corrs = np.corrcoef(x,y)[0,1]
        return corrs
        #else:
        #    return()
    except ValueError:
        return()
    except TypeError:
        return()

In [11]:
def test_corrFun():
    assert abs(correlate([xCorr,yCorr])-rho) <= 0.2

Execute Tests


In [12]:
%load_ext ipython_nose

In [25]:
%nose -v


Out[25]:
 
 
5/5 tests passed

In [ ]: