In [ ]:
# Parallel processing for sum-square-distance
def dist(tlist):
    xname = tlist[0]
    yname = tlist[1]
    x = wrdsData.xs(xname, level='ticker').PRC/(wrdsData.xs(xname, level='ticker').PRC[0])
    y = wrdsData.xs(yname, level='ticker').PRC/(wrdsData.xs(yname, level='ticker').PRC[0])
    return([(xname, yname)], sum(map(lambda z:z**2, x-y)))

if __name__ == '__main__':
    pool = Pool(processes=4)
    #Test just the first 100 pairs - remove [0:100] for full test
    result = pd.DataFrame(pool.map(dist, tickerpairs[0:1000]))


smallssd = result.sort(columns = 1)[0:5]
print smallssd

In [ ]:
# Parallel processing for cointegration test:
def cointegration_test(y, x):
    try:
        result = stat.OLS(y, x).fit()
        return ts.adfuller(result.resid)
    except:
        return()

def coint(tlist):
    xname = tlist[0]
    yname = tlist[1]
    x = wrdsData.xs(xname, level='ticker').PRC
    y = wrdsData.xs(yname, level='ticker').PRC
    xclean = x[x.notnull() & y.notnull()]
    yclean = y[x.notnull() & y.notnull()]
    xp = list(xclean)
    yp = list(yclean)
    try:
        return([(xname, yname)], cointegration_test(xp,yp)[1]) #get the p-value of test for each pair
    except ValueError, TypeError:
        return()



if __name__ == '__main__':
    pool = Pool(processes=4)
    #Test just the first 100 pairs - remove [0:100] for full test
    cointResult = pool.map(coint, tickerpairs[0:1000])
    print cointResult


#smallCoint = cointResult.sort(columns = 1)[0:5]
#print smallCoint