In [ ]:
# Parallel processing for sum-square-distance
def dist(tlist):
xname = tlist[0]
yname = tlist[1]
x = wrdsData.xs(xname, level='ticker').PRC/(wrdsData.xs(xname, level='ticker').PRC[0])
y = wrdsData.xs(yname, level='ticker').PRC/(wrdsData.xs(yname, level='ticker').PRC[0])
return([(xname, yname)], sum(map(lambda z:z**2, x-y)))
if __name__ == '__main__':
pool = Pool(processes=4)
#Test just the first 100 pairs - remove [0:100] for full test
result = pd.DataFrame(pool.map(dist, tickerpairs[0:1000]))
smallssd = result.sort(columns = 1)[0:5]
print smallssd
In [ ]:
# Parallel processing for cointegration test:
def cointegration_test(y, x):
try:
result = stat.OLS(y, x).fit()
return ts.adfuller(result.resid)
except:
return()
def coint(tlist):
xname = tlist[0]
yname = tlist[1]
x = wrdsData.xs(xname, level='ticker').PRC
y = wrdsData.xs(yname, level='ticker').PRC
xclean = x[x.notnull() & y.notnull()]
yclean = y[x.notnull() & y.notnull()]
xp = list(xclean)
yp = list(yclean)
try:
return([(xname, yname)], cointegration_test(xp,yp)[1]) #get the p-value of test for each pair
except ValueError, TypeError:
return()
if __name__ == '__main__':
pool = Pool(processes=4)
#Test just the first 100 pairs - remove [0:100] for full test
cointResult = pool.map(coint, tickerpairs[0:1000])
print cointResult
#smallCoint = cointResult.sort(columns = 1)[0:5]
#print smallCoint