In [ ]:
#nasdaq = pd.io.parsers.read_csv("http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ&render=download")
#nasdaq = nasdaq.sort(columns = "Symbol")
#print nasdaq
#symbols = nasdaq.Symbol[nasdaq['Symbol'].map(lambda x: len(x) < 5)]
#print symbols
#from pandas.io.data import DataReader
#from datetime import datetime
#aapl = DataReader("AAPL", "google", datetime(2011,1,1), datetime(2014,1,1))
#goog = DataReader("GOOG", "google", datetime(2011,1,1), datetime(2014,1,1))
#stocks = DataReader(symbols[0:10], "yahoo", datetime(2011,1,1), datetime(2014,1,1))
#print stocks
#stocks.to_pickle("stockdirty.pkl")
#stocktest = pd.read_pickle("stockdirty.pkl")
#print stocktest #yay, we have a storage solution
#stockDF = stocks.to_frame()
#stockDF.head()
#stockBack = stockDF.to_panel()
#print stockBack #OK good, we're back to our original panel format
#print stockDF
#stockDF.to_json(path_or_buf="~/stockfile.json", orient="index") #panels and MultiIndex data frame export to JSON still not supported..
#avg = pd.concat([goog.Close, aapl.Close], axis = 1, join = "inner", keys=['GOOG','AAPL'])
#x = np.array(avg.AAPL)
#y = np.array(avg.GOOG)
#X = sm.add_constant(x, prepend=True)
#results = sm.OLS(y,X).fit()
#print results.summary()
#print(np.mean(results.resid**2))
In [ ]:
# Series processing - please use parallel code below
#SSD = dict()
#Test just the first 100 pairs - remove [0:100] for full test
#for a, b in tickerpairs[0:100]:
# stockprice = wrdsData.xs(a, level='ticker').PRC
# normprice = stockprice/stockprice[0]
# stockprice2 = wrdsData.xs(b, level='ticker').PRC
# normprice2 = stockprice2/stockprice2[0]
# SSD[(a,b)] = sum([(normprice[k] - normprice2[k])**2 for k in range(min(len(normprice),len(normprice2)))])
#
#
#smallssd = sorted(SSD.items(), key=lambda x: x[1])[:5] #pick 5 smallest distance pairs from distance method
#print dict(smallssd).keys()
#print smallssd
In [ ]:
#def cointegration_test(y, x):
# result = stat.OLS(y, x).fit()
# return ts.adfuller(result.resid)
#
#
#CoIntegrate = dict()
#
#Test just the first 100 pairs - remove [0:100] for full test
#for a, b in tickerpairs[350:360]:
# stockdirty = wrdsData.xs(a, level='ticker').PRC
# stockdirty2 = wrdsData.xs(b, level='ticker').PRC
# stockclean = stockdirty[stockdirty.notnull() & stockdirty2.notnull()]
# stockclean2 = stockdirty2[stockdirty.notnull() & stockdirty2.notnull()]
# stockprice = list(stockclean)
# stockprice2 = list(stockclean2)
# try:
# CoIntegrate[(a,b)] = cointegration_test(stockprice,stockprice2)[1] #get the p-value of test for each pair
# except ValueError:
# print("Failed pair: " + a + ", " + b)
# continue
# except TypeError:
# print("Unknown error: " + a + ", " + b)
# continue
#
#
#pairsoptim = [CoIntegrate.keys()[i] for i in range(len(CoIntegrate)) if CoIntegrate.values()[i] < 0.05/len(CoIntegrate)]
#print pairsoptim