notebook.community

Edit and run



In [1]:

    
%pylab --no-import-all inline
import pandas as pd
import pandas.io.data as web
import numpy as np
import scipy.stats as stats
import scipy.signal as signal

import seaborn as sns
from collections import *
from operator import *
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
import re
import string
import os
from dateutil import parser

import matplotlib.pyplot as plt

import zipline as zp
from zipline.finance import trading
from zipline.finance.trading import TradingEnvironment
from zipline.utils import tradingcalendar

import datetime
from datetime import datetime
from datetime import timedelta
import pytz

utc=pytz.UTC

matplotlib.style.available
matplotlib.style.use('fivethirtyeight')
sns.set_context("talk", font_scale=1.2)
sns.set_palette("Set1",10, .80)
matplotlib.style.use('bmh')
matplotlib.rcParams['lines.linewidth'] = 1.5
matplotlib.rcParams['axes.facecolor'] = '0.99'
matplotlib.rcParams['figure.facecolor'] = '0.97'
figsize(15, 8)

indexTradingCal = pd.DatetimeIndex(tradingcalendar.trading_days)
indexTradingCal = indexTradingCal.normalize()









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
SPY = web.get_data_yahoo('SPY', start='1/1/1970')
SPY = pd.DataFrame.rename(SPY, columns={'Adj Close': 'AdjClose'})
SPY.columns.name = "SPY"

AAPL = web.get_data_yahoo('AAPL', start='1/1/1970')
AAPL = pd.DataFrame.rename(AAPL, columns={'Adj Close': 'AdjClose'})
AAPL.columns.name = "AAPL"

IBM = web.get_data_yahoo('IBM', start='1/1/1970')
IBM = pd.DataFrame.rename(IBM, columns={'Adj Close': 'AdjClose'})
IBM.columns.name = "IBM"

GLD = web.get_data_yahoo('GLD', start='1/1/1970')
GLD = pd.DataFrame.rename(GLD, columns={'Adj Close': 'AdjClose'})
GLD.columns.name = "GLD"

SLV = web.get_data_yahoo('SLV', start='1/1/1970')
SLV = pd.DataFrame.rename(SLV, columns={'Adj Close': 'AdjClose'})
SLV.columns.name = "SLV"

USO = web.get_data_yahoo('USO', start='1/1/1970')
USO = pd.DataFrame.rename(USO, columns={'Adj Close': 'AdjClose'})
USO.columns.name = "USO"

TLT = web.get_data_yahoo('TLT', start='1/1/1970')
TLT = pd.DataFrame.rename(TLT, columns={'Adj Close': 'AdjClose'})
TLT.columns.name = "TLT"

GE = web.get_data_yahoo('GE', start='1/1/1970')
GE = pd.DataFrame.rename(GE, columns={'Adj Close': 'AdjClose'})
GE.columns.name = "GE"

XOM = web.get_data_yahoo('XOM', start='1/1/1970')
XOM = pd.DataFrame.rename(XOM, columns={'Adj Close': 'AdjClose'})
XOM.columns.name = "XOM"

SDS = web.get_data_yahoo('SDS', start='1/1/1970')
SDS = pd.DataFrame.rename(SDS, columns={'Adj Close': 'AdjClose'})
SDS.columns.name = "SDS"

QID = web.get_data_yahoo('QID', start='1/1/1970')
QID = pd.DataFrame.rename(QID, columns={'Adj Close': 'AdjClose'})
QID.columns.name = "QID"

# managed risk/risk parity mutual funds/ETFs
AQRNX = web.get_data_yahoo('AQRNX', start='1/1/1970')
AQRNX = pd.DataFrame.rename(AQRNX, columns={'Adj Close': 'AdjClose'})
AQRNX.columns.name = "AQRNX"

RORO = web.get_data_yahoo('RORO', start='1/1/1970')
RORO = pd.DataFrame.rename(RORO, columns={'Adj Close': 'AdjClose'})
RORO.columns.name = "RORO"

# Alt style ETF
   

#option funds/ETF
EXD = web.get_data_yahoo('EXD', start='1/1/1970')
EXD = pd.DataFrame.rename(EXD, columns={'Adj Close': 'AdjClose'})
EXD.columns.name = "EXD"

ETJ = web.get_data_yahoo('ETJ', start='1/1/1970')
ETJ = pd.DataFrame.rename(ETJ, columns={'Adj Close': 'AdjClose'})
ETJ.columns.name = "ETJ"

EOS = web.get_data_yahoo('EOS', start='1/1/1970')
EOS = pd.DataFrame.rename(EOS, columns={'Adj Close': 'AdjClose'})
EOS.columns.name = "EOS"

ETW = web.get_data_yahoo('ETW', start='1/1/1970')
ETW = pd.DataFrame.rename(ETW, columns={'Adj Close': 'AdjClose'})
ETW.columns.name = "ETW"

ETV = web.get_data_yahoo('ETV', start='1/1/1970')
ETV = pd.DataFrame.rename(ETV, columns={'Adj Close': 'AdjClose'})
ETV.columns.name = "ETV"

ERW = web.get_data_yahoo('ERW', start='1/1/1970')
ERW = pd.DataFrame.rename(ERW, columns={'Adj Close': 'AdjClose'})
ERW.columns.name = "ERW"

ETB = web.get_data_yahoo('ETB', start='1/1/1970')
ETB = pd.DataFrame.rename(ETB, columns={'Adj Close': 'AdjClose'})
ETB.columns.name = "ETB"



In [3]:

    
testStocksDict = {'SPY':SPY.AdjClose, 'AAPL':AAPL.AdjClose, 'IBM':IBM.AdjClose, 'XOM':XOM.AdjClose, 'GE':GE.AdjClose,
                  'GLD':GLD.AdjClose, 'SLV':SLV.AdjClose, 'USO':USO.AdjClose,
                  'SDS':SDS.AdjClose, 'QID':QID.AdjClose }



In [4]:

    
pd.DataFrame(testStocksDict).plot()









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x1044b6fd0>



In [5]:

    
def normalize(df, withStartingValue=1):
    if withStartingValue > 1:
        return withStartingValue * ( df / df.iloc[0] )
    else:
        return df / df.iloc[0]

def cum_returns(df, withStartingValue=None):
    if withStartingValue is None:
        return (1 + df).cumprod() - 1
    else:
        return (1 + df).cumprod() * withStartingValue
    
def detrendTS(theTS):
    return pd.Series(data=signal.detrend(theTS.values),index=theTS.index.values)



In [6]:

    
##### Strategy Performance statistics & timeseries analysis functions

def maxDrawdown(ts):
    MDD = 0
    DD = 0
    peak = -99999
    for value in ts:
        if (value > peak):
            peak = value
        else:
            DD = (peak - value) / peak
        if (DD > MDD):
            MDD = DD
    return -1*MDD

def annualReturn(ts, inputIsNAV=True):
    if ts.size < 2:
        return np.nan
    if inputIsNAV:
        tempReturns = ts.pct_change().dropna()
        return tempReturns.mean() * 252
    else:
        return ts.mean() * 252


def annualVolatility(ts, inputIsNAV=True):
    if ts.size < 2:
        return np.nan
    if inputIsNAV:
        tempReturns = ts.pct_change().dropna()
        return tempReturns.std() * np.sqrt(252)
    else:
        return ts.std() * np.sqrt(252)

def calmerRatio(ts, inputIsNAV=True):
    if inputIsNAV:
        temp = annualReturn(ts=ts, inputIsNAV=inputIsNAV) / abs(maxDrawdown(ts=ts))
    else:
        tempNAV = cum_returns(ts,withStartingValue=100)
        temp = annualReturn(ts=ts, inputIsNAV=inputIsNAV) / abs(maxDrawdown(ts=tempNAV))
        
    if np.isinf(temp):
        return np.nan
    else:
        return temp
    
def sharpeRatio(ts, inputIsNAV=True):
    return annualReturn(ts, inputIsNAV=True) / annualVolatility(ts, inputIsNAV=True)

def stabilityOfTimeseries( ts, logValue=True ):
    if ts.size < 2:
        return np.nan
    
    tsLen = ts.size
    X = range(0, tsLen)
    X = sm.add_constant(X)
    if logValue:
        tempValues = np.log10(ts.values)
    else:
        tempValues = ts.values
    model = sm.OLS( tempValues, X ).fit()
    
    return model.rsquared

def betaTimeseries( theTS, benchmarkTS=SPY.AdjClose, inputIsReturns=False):
    tempTS = theTS.copy()
    tempBench = benchmarkTS.copy()
    
    tempTS = tempTS.asfreq(freq='D',normalize=True)
    tempBench = tempBench.asfreq(freq='D',normalize=True)
    
    if not inputIsReturns:
        tempTS = tempTS.pct_change().dropna()
        tempBench = tempBench.pct_change().dropna()
    
    tempTS = tempTS[ np.isfinite(tempTS) ]
    tempBench = tempBench[ np.isfinite(tempBench) ]
    
    # remove intraday timestamps by normalizing since only working with daily data right now
    # tempTS.reindex(tempTS.index.normalize())
    # tempBench.reindex(tempBench.index.normalize())
    
    # tempTS.reindex(indexTradingCal)
    # tempBench.reindex(indexTradingCal)
    
    tempAlign = tempBench.align(tempTS,join='inner')
    alignBench = tempAlign[0]
    alignTS = tempAlign[1]
    # print( alignBench.head() )
    # print( alignTS.head() )
    regX = np.array( alignBench.values )
    regY = np.array( alignTS.values )
    
    regX = np.reshape(regX,len(regX))
    regY = np.reshape(regY,len(regY))
    
    m, b = np.polyfit(regX, regY, 1) 
    
    return m  

def hurst(ts, lagsToTest=20):  
    tau = []
    lagvec = []  
    #  Step through the different lags  
    for lag in range(2,lagsToTest):  
        #  produce price difference with lag  
        pp = np.subtract(ts[lag:],ts[:-lag])  
        #  Write the different lags into a vector  
        lagvec.append(lag)  
        #  Calculate the variance of the differnce vector  
        tau.append(np.sqrt(np.std(pp)))  
    #  linear fit to double-log graph (gives power)  
    m = np.polyfit(np.log10(lagvec),np.log10(tau),1)  
    # calculate hurst  
    hurst = m[0]*2  
    # plot lag vs variance  
    #py.plot(lagvec,tau,'o'); show()  
    return hurst 

def halfLife(ts):
    
    price = pd.Series(ts)  
    lagged_price = price.shift(1).fillna(method="bfill")  
    delta = price - lagged_price  
    beta = np.polyfit(lagged_price, delta, 1)[0] 
    half_life = (-1*np.log(2)/beta) 
    
    return half_life

def out_of_sample_vs_in_sample_P(ts, in_sample_length) :
    total_length = len(ts)
    short_long_diff = total_length - in_sample_length
    
    in_sample_list = [ x for x in ts[0:short_long_diff] ]
    out_of_sample_list = [ x for x in ts[short_long_diff:total_length] ]
    
    tempP = stats.ttest_ind(in_sample_list, out_of_sample_list, equal_var = False)[1]
    
    return tempP


def out_of_sample_vs_in_sample_P_rolling(ts, in_sample_length, rolling_length) :
    test_rolling_ttest = [ (ts.index[i+rolling_length] , 
                            out_of_sample_vs_in_sample_P(ts[i:i+rolling_length], in_sample_length) ) 
                          for i in range(0, len(ts)-rolling_length,rolling_length) ]
    
    tempDF = pd.DataFrame(test_rolling_ttest)
    tempDF.index = tempDF[0]
    tempDF.index.name = 'Date'
    tempDF = tempDF.drop(0,1)
    tempDF.columns = ['pvalue']
    
    return tempDF

##### Strategy Performance statistics & timeseries analysis functions



In [7]:

    
out_of_sample_vs_in_sample_P_rolling(AAPL.AdjClose.pct_change().dropna(), 100, 200).plot()
print( np.mean(out_of_sample_vs_in_sample_P_rolling(AAPL.AdjClose.pct_change().dropna(), 100, 200)) )









    



pvalue    0.45323
dtype: float64



In [38]:

    
stats.ttest_ind([30.02,29.99,30.11,29.97,30.01,29.99], [29.89,29.93,29.72,29.98,30.02,29.98], equal_var = False)









    Out[38]:





(1.9590058081081434, 0.090773324285661136)



In [39]:

    
# compared to the above, when assuming that each sample was pulled from populations with equal variance,
# the below is somewhat more suggestive of a difference in the mean weights for the two samples used as inputs to the t-test
stats.ttest_ind([30.02,29.99,30.11,29.97,30.01,29.99], [29.89,29.93,29.72,29.98,30.02,29.98], equal_var = True)









    Out[39]:





(1.9590058081081436, 0.078565773857230708)



In [40]:

    
print np.mean([2,3,5,6,4,2,2,2,2])
print np.mean([2,2,2,2,2,6,3,4,5])









    



3.11111111111
3.11111111111



In [41]:

    
stats.ttest_ind([2,3,5,6,4,2,2,2,2], [2,2,2,2,2,6,3,4,5], equal_var = True)









    Out[41]:





(0.0, 1.0)



In [42]:

    
halfLife(detrendTS(SPY.Close))









    Out[42]:





524.09408331892405



In [69]:

    
def plotHeatmap(df, titleStr='', cmap=pyplot.cm.RdYlGn):
    """
    This creates our heatmap using our sharpe ratio dataframe
    """
    fig = pyplot.figure()
    ax = fig.add_subplot(111)
    axim = ax.imshow(df.values, cmap=cmap, interpolation = 'nearest')
    ax.set_xlabel(df.columns.name)
    ax.set_xticks(np.arange(len(df.columns)))
    ax.set_xticklabels(list(df.columns))
    ax.set_ylabel(df.index.name)
    ax.set_yticks(np.arange(len(df.index)))
    ax.set_yticklabels(list(df.index))
    ax.set_title(titleStr)
    pyplot.colorbar(axim)
    
def plotScatter(x1Values, y1Values, 
                moreXvaluesList=None, moreYvaluesList=None, 
                autoLabelValues1=False, autoLabelMoreValues=False,
                autoLabelMoreValuesIndexes = [0],
                plotTitle=None,
                xAxisLabel=None, yAxisLabel=None,
                showLegend=True,
                legendLocation='upper left',
                legendLabel1='series1',
                legendLabelMoreList=None,
                showRegressionLine=True,
                seriesToUseForRegression=1,
                colorOrder = ['blue','orange','red','black','pink','gray','yellow','purple','darkred','darkblue'],
                transparency=0.7):
    
    x = np.array(x1Values)
    y = np.array(y1Values)
    x = np.reshape(x,len(x))
    y = np.reshape(y,len(y))
    
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    
    pointColor = 1
    
    ax1.scatter(x, y, s=200, color=colorOrder[0], alpha=transparency, marker="o", label=legendLabel1)
    if autoLabelValues1:
        for i in range(x.size):
            ax1.annotate( str(i), xy=(x[i]*1.01, y[i]*1.01), size=20 )
    
    if (moreXvaluesList is not None) and (moreYvaluesList is not None):
        count = 0
        for xyTemp in zip(moreXvaluesList, moreYvaluesList): 
            xTemp = np.array(xyTemp[0])
            yTemp = np.array(xyTemp[1])
            xTemp = np.reshape(xTemp,len(xTemp))
            yTemp = np.reshape(yTemp,len(yTemp))
            if legendLabelMoreList is None:
                tempLegendLabel = 'series' + str(count+2)
            else:
                tempLegendLabel = legendLabelMoreList[count]
            ax1.scatter(xTemp, yTemp, color=colorOrder[pointColor], alpha=transparency, s=200, marker="o", label=tempLegendLabel)
            pointColor += 1
            if autoLabelMoreValues:
                if count in autoLabelMoreValuesIndexes:
                    for i in range(xTemp.size):
                        ax1.annotate( str(i), xy=( xTemp[i]*1.01, yTemp[i]*1.01 ), size=20 )
            count += 1
            
    if showRegressionLine:
        if seriesToUseForRegression == 1:
            regX = x
            regY = y
        else:
            regX = np.array( moreXvaluesList[seriesToUseForRegression-2] )
            regY = np.array( moreYvaluesList[seriesToUseForRegression-2] )
            regX = np.reshape(regX,len(regX))
            regY = np.reshape(regY,len(regY))
        m, b = np.polyfit(regX, regY, 1)
        ax1.plot(regX, m*regX + b, '-', color=colorOrder[seriesToUseForRegression-1], alpha=transparency) 
     
    if plotTitle is not None:
        plt.title(plotTitle)
    if showLegend:
        plt.legend(loc=legendLocation)
    if xAxisLabel is not None:
        plt.xlabel(xAxisLabel)
    if yAxisLabel is not None:
        plt.ylabel(yAxisLabel)
    plt.show()



In [51]:

    
def calcEqualWeightIndex_fromDict(tsDict, onlyGetDailyReturns=False, indexStartValue=100, startDate=None, endDate=None):
    mergedDF_pct = multiTimeseriesToDF_fromDict(tsDict, asPctChange=True, startDate=startDate, endDate=endDate, fillNAvalue=0)
    # print(mergedDF_pct.head(3))
    equalWeight_pct = mergedDF_pct.sum(axis=1) / len(tsDict.keys())

    if onlyGetDailyReturns:
        return equalWeight_pct
    else:
        return cum_returns(equalWeight_pct, withStartingValue=indexStartValue)
    
def calcEqualWeightIndex(tsList, tsNamesArr=None, onlyGetDailyReturns=False, indexStartValue=100):
    if tsNamesArr is None:
        inputNames = range(len(tsList))
    else:
        inputNames = tsNamesArr
    mergedDF_pct = multiTimeseriesToDF(tsList, inputNames, asPctChange=True, fillNAvalue=0)
    # print(mergedDF_pct.head(3))
    equalWeight_pct = mergedDF_pct.sum(axis=1) / len(tsList)
    if onlyGetDailyReturns:
        return equalWeight_pct
    else:
        return cum_returns(equalWeight_pct, withStartingValue=indexStartValue)



In [9]:

    
def dfTS(df, dateColumnLabel='Date'):
    # e.g.: takes a dataframe from a yahoo finance price csv and returns a df with datetime64 index
    colNames = df.columns
    tempDF = df.copy()
    indexDates = map(np.datetime64, df.ix[:,dateColumnLabel].values)   
    # tempDF = pd.DataFrame(data=df.values , index=map(pd.to_datetime, indexDates))
    tempDF = pd.DataFrame(data=df.values , index=indexDates)
    tempDF.columns = colNames
    
    return tempDF.drop(axis=1,labels=dateColumnLabel).sort_index()

def appendSeries( ts1, ts2 ):
    return pd.Series( data=np.concatenate([ts1,ts2]) , index=np.concatenate([ts1.index,ts2.index]) )

def multiTimeseriesToDF_fromDict( tsDictInput, asPctChange=False, startDate=None, endDate=None,
                        dropNA=False, ffillNA=False, fillNAvalue=None ):
    tempDict = {}
    
    for i in tsDictInput.keys():
        if asPctChange:
            # print(i)
            # print(tsDictInput.get(i).head())
            tempDict[ i ] = sliceTS(tsDictInput.get(i).pct_change().dropna(), startDate=startDate, endDate=endDate)
        else:
            tempDict[ i ] = sliceTS(tsDictInput.get(i), startDate=startDate, endDate=endDate)
    tempDF = pd.DataFrame(tempDict)
    if dropNA:
        tempDF = tempDF.dropna()
    elif ffillNA:
        tempDF = tempDF.fillna(method="ffill") 
    elif fillNAvalue is not None:
        tempDF = tempDF.fillna(fillNAvalue)
        
    return tempDF

def multiTimeseriesToDF( tsSeriesList, tsSeriesNamesArr, asPctChange=False, startDate=None, endDate=None,
                        dropNA=False, ffillNA=False, fillNAvalue=None ):
    tempDict = {}
    
    for i in range(0,len(tsSeriesNamesArr)):
        if asPctChange:
            tempDict[ tsSeriesNamesArr[i] ] = sliceTS(tsSeriesList[i].pct_change().dropna(), startDate=startDate, endDate=endDate)
        else:
            tempDict[ tsSeriesNamesArr[i] ] = tsSeriesList[i]
    tempDF = pd.DataFrame(tempDict)
    if dropNA:
        tempDF = tempDF.dropna()
    elif ffillNA:
        tempDF = tempDF.fillna(method="ffill") 
    elif fillNAvalue is not None:
        tempDF = tempDF.fillna(fillNAvalue)
        
    return tempDF

def sliceTS(theTS, startDate=None, endDate=None):
    if (startDate is None) and (endDate is None):
        return theTS
    elif startDate is None:
        return theTS[ theTS.index < endDate ]
    elif endDate is None:
        return theTS[ theTS.index > startDate ]
    else:
        return theTS[ (theTS.index > startDate) & (theTS.index < endDate) ]
    
def compareTS( tsDictOrList ):
    if type(tsDictOrList) == type({}):    
        return normalize(multiTimeseriesToDF_fromDict( tsDictOrList,dropNA=True))
    else:
        tempDict = {}
        for i in range(len(tsDictOrList)):
            tempDict[i] = tsDictOrList[i]
        return normalize(multiTimeseriesToDF_fromDict( tempDict,dropNA=True))



In [26]:

    
compareTS( {'ERW':ERW.AdjClose, 'SPY':SPY.AdjClose }).plot()









    Out[26]:





<matplotlib.axes._subplots.AxesSubplot at 0x10a92d450>



In [11]:

    
compareTS( [AAPL.AdjClose, SPY.AdjClose] ).plot()









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x10abc3d10>



In [35]:

    
SPY.AdjClose.plot()
pd.Series(data=signal.detrend(SPY.AdjClose.values), index=SPY.AdjClose.index.values).plot()









    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x10e6d6550>



In [36]:

    
tempADF = ts.adfuller(signal.detrend(SPY.AdjClose.values), regression='ct', autolag=None, maxlag=1, store=True)



In [37]:

    
tempADFrs = tempADF[3]



In [38]:

    
tempADFrs.adfstat









    Out[38]:





-1.0362228677616785



In [38]:

    
ts.adfuller(signal.detrend(SPY.AdjClose.values), regression='ct', autolag=None, maxlag=1)









    Out[38]:





(-1.0325872460535628,
 0.93956601182871469,
 1,
 5504,
 {'1%': -3.9604157611554074,
  '10%': -3.1275198971396243,
  '5%': -3.4112879729674606})



In [35]:

    
ts.adfuller(SPY.AdjClose.values, regression='ct', autolag=None, maxlag=1)









    Out[35]:





(-1.0325872460552414,
 0.93956601182847188,
 1,
 5504,
 {'1%': -3.9604157611554074,
  '10%': -3.1275198971396243,
  '5%': -3.4112879729674606})



In [39]:

    
ts.adfuller(signal.detrend(SPY.AdjClose.values), regression='c', autolag=None, maxlag=1)









    Out[39]:





(-1.0341961560313702,
 0.74058866410675295,
 1,
 5504,
 {'1%': -3.4315386541431767,
  '10%': -2.5670495985385364,
  '5%': -2.8620652671839455})



In [40]:

    
ts.adfuller((SPY.AdjClose - AAPL.AdjClose).dropna().values, regression='ct', autolag=None, maxlag=1)









    Out[40]:





(-2.0027920879119936,
 0.59991195688310284,
 1,
 5504,
 {'1%': -3.9604157611554074,
  '10%': -3.1275198971396243,
  '5%': -3.4112879729674606})



In [41]:

    
(SPY.AdjClose - AAPL.AdjClose).plot()









    Out[41]:





<matplotlib.axes._subplots.AxesSubplot at 0x10cc0bad0>



In [42]:

    
tl1 = [SPY.Close[0:10],AAPL.Close]



In [45]:

    
multiTimeseriesToDF(tl1,['SPY','AAPL'],asPctChange=True, dropNA=True).head(30)



In [314]:

    
len(testStocksDict.keys())









    Out[314]:





10



In [40]:

    
normalize(SPY.AdjClose).plot()
normalize(AAPL.AdjClose).plot()
normalize(GE.AdjClose).plot()
calcEqualWeightIndex([SPY.AdjClose,AAPL.AdjClose,GE.AdjClose],indexStartValue=1).plot(color='black')









    Out[40]:





<matplotlib.axes._subplots.AxesSubplot at 0x10d8e5d10>



In [84]:

    
def findBest_bootstrap(tsDict, seedPositionStr, numToSelect=5, detrend=False, applyABStoCorr=True, 
                       useCoint=False, autoLagStyle=None, maxLagDays=1 ):
    if len(tsDict.keys()) < numToSelect:
        numIter = len(tsDict.keys())-1
    else:
        numIter = numToSelect-1
    
    print( seedPositionStr)
    
    currPortDict = {}
    currPortDict[seedPositionStr] = tsDict.get(seedPositionStr).copy()
    
    seedPosition = tsDict.get(seedPositionStr).copy()
    seedPosition.name = seedPositionStr
    currPort = seedPosition
    
    remainingIdArr = np.setdiff1d( tsDict.keys(), currPortDict.keys() )
    
    print(remainingIdArr)

    for j in range(numIter):
        if useCoint:
            if detrend:
                # print("running ADF test with detrending")
                tempADF = [ (i, ts.adfuller( (detrendTS(tsDict.get(i))+detrendTS(currPort)).dropna().values, regression='ct',maxlag=maxLagDays, autolag=autoLagStyle)[1] ) for i in remainingIdArr ]
            else:
                # print("running ADF test with detrending")
                tempADF = [ (i, ts.adfuller( (tsDict.get(i)+currPort).dropna().values, regression='c',maxlag=maxLagDays, autolag=autoLagStyle)[1] ) for i in remainingIdArr ]
        else:
            if detrend:
                if applyABStoCorr:
                    tempADF = [ (i, abs( detrendTS(tsDict.get(i)).pct_change().dropna().corr(detrendTS(currPort).pct_change().dropna()) ) ) for i in remainingIdArr ]
                else:
                    tempADF = [ (i, detrendTS(tsDict.get(i)).pct_change().dropna().corr(detrendTS(currPort).pct_change().dropna()) ) for i in remainingIdArr ]
            else:
                if applyABStoCorr:
                    tempADF = [ (i, abs( tsDict.get(i).pct_change().dropna().corr(currPort.pct_change().dropna()) ) ) for i in remainingIdArr ]
                else:
                    tempADF = [ (i, tsDict.get(i).pct_change().dropna().corr(currPort.pct_change().dropna()) ) for i in remainingIdArr ]
        # print(tempADF)           
        tempADFdf = pd.DataFrame(tempADF)
        tempADFser = pd.Series(data=tempADFdf[1].values,index=tempADFdf[0].values)
        tempADFser.sort()
        print(tempADFser)
        print(tempADFser.index.values[0])
        print(tempADFser.values[0])
        newPositionStr = tempADFser.index[0]
        print( newPositionStr )
        
        newPosition = tsDict.get(newPositionStr).copy()
        newPosition.name = newPositionStr
        currPortDict[ newPositionStr ] = newPosition
        
        currPort = calcEqualWeightIndex( currPortDict.values(), currPortDict.keys(), indexStartValue=100 )
        remainingIdArr = np.setdiff1d( tsDict.keys(), currPortDict.keys() )
    print(currPortDict.keys())    
    return (currPortDict.keys(), currPort)



In [85]:

    
AAPL.AdjClose.size









    Out[85]:





8581



In [86]:

    
[ i for i in testStocksDict.keys() if testStocksDict.get(i).size > 8000 ]









    Out[86]:





['GE', 'IBM', 'AAPL', 'XOM']



In [87]:

    
coint_bs = findBest_bootstrap(testStocksDict, 'SPY', numToSelect=5, detrend=True, applyABStoCorr=True, useCoint=True)
#normalize(multiTimeseriesToDF( testStocksDict.values(), testStocksDict.keys()),withStartingValue=100).plot()
print("sharpe ratio: ")
print(sharpeRatio(coint_bs[1]))
print("stability of timeseries: ")
print(stabilityOfTimeseries(coint_bs[1]))
print("Beta: ")
print(betaTimeseries(coint_bs[1],SPY.AdjClose))
print( [ (i , sharpeRatio(testStocksDict.get(i)) ) for i in testStocksDict.keys() ] )
print( [ (i , stabilityOfTimeseries(testStocksDict.get(i)) ) for i in testStocksDict.keys() ] )
calcEqualWeightIndex(testStocksDict.values()).plot(color='blue',linewidth=6)
coint_bs[1].plot(color='black', linewidth=6)









    



SPY
['AAPL' 'GE' 'GLD' 'IBM' 'QID' 'SDS' 'SLV' 'USO' 'XOM']
QID     0.132090
SDS     0.212697
GLD     0.339050
IBM     0.548083
XOM     0.738390
SLV     0.823948
GE      0.870737
USO     0.906663
AAPL    0.996234
dtype: float64
QID
0.132089934694
QID
SDS     0.230367
SLV     0.313450
USO     0.630810
AAPL    0.678314
GLD     0.826418
IBM     0.853751
GE      0.946417
XOM     0.951139
dtype: float64
SDS
0.230367078474
SDS
AAPL    0.344307
SLV     0.435210
USO     0.585294
GLD     0.801361
IBM     0.806899
GE      0.930701
XOM     0.945641
dtype: float64
AAPL
0.344307099597
AAPL
IBM    0.839652
USO    0.862262
XOM    0.876290
GE     0.878195
SLV    0.885640
GLD    0.930268
dtype: float64
IBM
0.839651924674
IBM
['SPY', 'AAPL', 'SDS', 'IBM', 'QID']
sharpe ratio: 
0.474964290771
stability of timeseries: 
0.935411563004
Beta: 
0.205134289618
[('SPY', 0.56458037273005202), ('GE', 0.52171019177377909), ('IBM', 0.41007108374946744), ('SDS', -0.44718619449198288), ('USO', -0.22783937433345189), ('QID', -0.70174036235762782), ('AAPL', 0.58255865302825849), ('XOM', 0.66950594203883584), ('GLD', 0.56547051644294521), ('SLV', 0.2169929035569037)]
[('SPY', 0.73009819335295889), ('GE', 0.90615486211533391), ('IBM', 0.91183722596738381), ('SDS', 0.85278110464487866), ('USO', 0.44648969769617108), ('QID', 0.93430269961174883), ('AAPL', 0.78168595998669799), ('XOM', 0.981530543689213), ('GLD', 0.79629691051491291), ('SLV', 0.38203070622626467)]






    Out[87]:





<matplotlib.axes._subplots.AxesSubplot at 0x10af20550>



In [88]:

    
corr_bs = findBest_bootstrap(testStocksDict, 'SPY', numToSelect=5, detrend=True, applyABStoCorr=True, useCoint=False)
# normalize(multiTimeseriesToDF( testStocksDict.values(), testStocksDict.keys()),withStartingValue=100).plot()
print("sharpe ratio: ")
print(sharpeRatio(corr_bs[1]))
print("stability of timeseries: ")
print(stabilityOfTimeseries(corr_bs[1]))
print("Beta: ")
print(betaTimeseries(corr_bs[1],SPY.AdjClose))

print( [ (i , sharpeRatio(testStocksDict.get(i)) ) for i in testStocksDict.keys() ] )
print( [ (i , stabilityOfTimeseries(testStocksDict.get(i)) ) for i in testStocksDict.keys() ] )
calcEqualWeightIndex(testStocksDict.values()).plot(color='blue',linewidth=6)
coint_bs[1].plot(color='black', linewidth=6)
corr_bs[1].plot(color='gray', linewidth=4)









    



SPY
['AAPL' 'GE' 'GLD' 'IBM' 'QID' 'SDS' 'SLV' 'USO' 'XOM']
GE      0.000230
IBM     0.000595
AAPL    0.000625
XOM     0.000803
SDS     0.003052
QID     0.003147
GLD     0.003587
USO     0.006164
SLV     0.087858
dtype: float64
GE
0.000229900874222
GE
QID     0.000436
IBM     0.000839
XOM     0.002381
SDS     0.003428
AAPL    0.003880
USO     0.004513
GLD     0.005182
SLV     0.006420
dtype: float64
QID
0.000436258359164
QID
IBM     0.001410
USO     0.002098
SDS     0.002554
AAPL    0.003444
GLD     0.006233
SLV     0.008185
XOM     0.009372
dtype: float64
IBM
0.00140998195352
IBM
GLD     0.000219
SLV     0.000400
AAPL    0.000867
XOM     0.001209
SDS     0.003186
USO     0.004838
dtype: float64
GLD
0.000219479611323
GLD
['SPY', 'GE', 'GLD', 'IBM', 'QID']
sharpe ratio: 
0.548968121351
stability of timeseries: 
0.957219356032
Beta: 
0.416588147281
[('SPY', 0.56458037273005202), ('GE', 0.52171019177377909), ('IBM', 0.41007108374946744), ('SDS', -0.44718619449198288), ('USO', -0.22783937433345189), ('QID', -0.70174036235762782), ('AAPL', 0.58255865302825849), ('XOM', 0.66950594203883584), ('GLD', 0.56547051644294521), ('SLV', 0.2169929035569037)]
[('SPY', 0.73009819335295889), ('GE', 0.90615486211533391), ('IBM', 0.91183722596738381), ('SDS', 0.85278110464487866), ('USO', 0.44648969769617108), ('QID', 0.93430269961174883), ('AAPL', 0.78168595998669799), ('XOM', 0.981530543689213), ('GLD', 0.79629691051491291), ('SLV', 0.38203070622626467)]






    Out[88]:





<matplotlib.axes._subplots.AxesSubplot at 0x1102996d0>



In [89]:

    
GE.tail()



In [90]:

    
GE.AdjClose.plot()
AAPL.AdjClose.plot(legend=True).legend(loc='upper left')
plt.ylabel('ascasdcas')









    Out[90]:





<matplotlib.text.Text at 0x110102110>



In [91]:

    
_=[ sliceTS(testStocksDict.get(i),'2013-1-1','2014-1-1').plot(legend=True,label=i) for i in testStocksDict.keys()]



In [92]:

    
AAPL.AdjClose[ (AAPL.AdjClose.index > pd.datetime(2015,1,1)) & (AAPL.AdjClose.index < pd.datetime(2016,1,1)) ].tail().size









    Out[92]:





0



In [93]:

    
def filterAndRank(tsInputDict, minDaysOfTrading=100, startDate=None, endDate=None,
                    useCumulativeHistoryForCalcs=True,
                    returnScore0to100=True,
                    score100isBest=True,
                    applyFilters=True,
                    filterAnnualReturn = 0.01,
                    filterSharpeValue = 0.1,
                    filterAnnualVolatilityValue = 0.90,
                    filterStabilityValue = 0.01,
                    filterMaxDrawdownValue = -0.90,
                    filterCalmerValue = 0.1,
                    filterOutLongOnly=False,
                    normalizeRanking=False):
    # 'startDate' and 'endDate' should be of the form: pd.datetime(2015,1,1)
    #
    # this function first grabs only inputs that have been trading for more than 'minDaysOfTrading'
    # then it ranks each of them by the following metrics:
    #     - sharpeRatio
    #     - annualVolatility
    #     - annualReturn
    #     - maxDrawdown
    #     - stabilityOfTimeseries
    # Then all rankings for each are totaled (for now it just equally weights each metric's contribution)
    
    print str(len(tsInputDict.keys())) + '  : # of Inputs: '
    
    tsDict = {}
    
    if useCumulativeHistoryForCalcs:
        sliceStartDate = None
    else:
        sliceStartDate = startDate
    
    # slice the input timeseries if 'startDate' and 'endDate' are passed
    if (sliceStartDate is not None) or (endDate is not None):
        for i in tsInputDict.keys():
            tempTS = tsInputDict.get(i).copy()
            tempTS = sliceTS(tempTS, sliceStartDate, endDate)
            if tempTS.size > minDaysOfTrading:
                tsDict[i] = tempTS
        print str(len(tsDict.keys())) + '  : # of Inputs with data btwn startDate and endDate'
    else:
        tsDict = tsInputDict.copy()
    
    algosWithMinDays = [ i for i in tsDict.keys() if tsDict.get(i).size > minDaysOfTrading ]
    
    print str(len(algosWithMinDays)) + '   : with >' + str(minDaysOfTrading) + ' days history: '
    
    algosToKeep = np.array([],dtype='object')
    
    for i in tsDict.keys():
        tempNAV = tsDict.get(i)   
        # only keep algos that meet the minimum values declared in the function parameter list
        if tempNAV.size > minDaysOfTrading:                        # meets minimum days active
            if applyFilters:
                if annualReturn(tempNAV) > filterAnnualReturn:     
                    if sharpeRatio(tempNAV) > filterSharpeValue:
                        if annualVolatility(tempNAV) < filterAnnualVolatilityValue:
                            if stabilityOfTimeseries(tempNAV) > filterStabilityValue:
                                if maxDrawdown(tempNAV) > filterMaxDrawdownValue:
                                    if calmerRatio(tempNAV) > filterCalmerValue:
                                        algosToKeep = np.append(algosToKeep, i)
            else:
                algosToKeep = np.append(algosToKeep, i)
    
    print str(algosToKeep.size) + '   : Those who pass quant filter criteria and move onto ranking algorithm: '
    if algosToKeep.size < 1:
        return {'ranks' : None, 'allRankInfo' : None, 'allInputsWithMinDay' : None }
    
#    print '        (* these criteria also simply filtered out those whose account data still needs cleaning)  '
#    print '        Note: ' + str(algosToKeep.size-removeSectorClonesIDs(allAlgoDataDF,algosToKeep,byQuant=byQuant,correlCutoff=0.9).size) + ' have high-correlation to long/sector strategy'

    if filterOutLongOnly:
        # algosToKeep = removeSectorClonesIDs(allAlgoDataDF,algosToKeep,byQuant=byQuant,correlCutoff=0.9)
        print str(algosToKeep.size) + '   : Kept after filtering out Sector/Long only correlated: '
    
    # now try selecting the algos for the portfolio by ranking each of the algos that
    # passed the above filters by each of the filter metrics
    allSharpeRatio = [ ( i, sharpeRatio(tsDict.get(i))) for i in algosToKeep ]
    allAnnualVol = [ ( i, annualVolatility(tsDict.get(i))) for i in algosToKeep ]
    allAnnualRet = [ ( i, annualReturn(tsDict.get(i))) for i in algosToKeep ]
    allMaxDrawdown = [ ( i, maxDrawdown(tsDict.get(i))) for i in algosToKeep ]
    allStability = [ ( i, stabilityOfTimeseries(tsDict.get(i))) for i in algosToKeep ]
    allCalmer = [ ( i, calmerRatio(tsDict.get(i))) for i in algosToKeep ]
    allHistoryDays = [ ( i, len(tsDict.get(i))) for i in algosToKeep ]
    
    sharpeRatioDict = dict(allSharpeRatio)
    rankingDF = pd.DataFrame.from_dict(sharpeRatioDict,orient='index')
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allAnnualVol),orient='index'),left_index=True,right_index=True)
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allAnnualRet),orient='index'),left_index=True,right_index=True)
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allMaxDrawdown),orient='index'),left_index=True,right_index=True)
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allStability),orient='index'),left_index=True,right_index=True)
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allCalmer),orient='index'),left_index=True,right_index=True)
    numOfMetrics = len(rankingDF.columns)
#    print 'numMetrics' + str(numOfMetrics)
    
    rankingDF = pd.merge(rankingDF, pd.DataFrame.from_dict(dict(allHistoryDays),orient='index'),left_index=True,right_index=True)
    rankingDF.columns = ['sharpe','annVol','annRet','maxDD','stability','calmer','histDays']
    
    if normalizeRanking:    
        rankingDF['rankSharpeRatio'] = rankingDF.sharpe.rank(ascending=False, na_option='bottom')
        rankingDF['rankSharpeRatio'] = rankingDF.rankSharpeRatio - rankingDF.rankSharpeRatio.mean()
        rankingDF['rankSharpeRatio'] = rankingDF.rankSharpeRatio / rankingDF.rankSharpeRatio.std()
        
        rankingDF['rankAnnVol'] = rankingDF.annVol.rank(ascending=True, na_option='bottom')
        rankingDF['rankAnnVol'] = rankingDF.rankAnnVol - rankingDF.rankAnnVol.mean()
        rankingDF['rankAnnVol'] = rankingDF.rankAnnVol / rankingDF.rankAnnVol.std()
        
        rankingDF['rankAnnRet'] = rankingDF.annRet.rank(ascending=False, na_option='bottom')
        rankingDF['rankAnnRet'] = rankingDF.rankAnnRet - rankingDF.rankAnnRet.mean()
        rankingDF['rankAnnRet'] = rankingDF.rankAnnRet / rankingDF.rankAnnRet.std()
        
        rankingDF['rankMaxDD'] = rankingDF.maxDD.rank(ascending=False, na_option='bottom')
        rankingDF['rankMaxDD'] = rankingDF.rankMaxDD - rankingDF.rankMaxDD.mean()
        rankingDF['rankMaxDD'] = rankingDF.rankMaxDD / rankingDF.rankMaxDD.std()
        
        rankingDF['rankStability'] = rankingDF.stability.rank(ascending=False, na_option='bottom')
        rankingDF['rankStability'] = rankingDF.rankStability - rankingDF.rankStability.mean()
        rankingDF['rankStability'] = rankingDF.rankStability / rankingDF.rankStability.std()
        
        rankingDF['rankCalmer'] = rankingDF.calmer.rank(ascending=False, na_option='bottom')
        rankingDF['rankCalmer'] = rankingDF.rankCalmer - rankingDF.rankCalmer.mean()
        rankingDF['rankCalmer'] = rankingDF.rankCalmer / rankingDF.rankCalmer.std()
    else:
        rankingDF['rankSharpeRatio'] = rankingDF.sharpe.rank(ascending=False, na_option='bottom')
        rankingDF['rankAnnVol'] = rankingDF.annVol.rank(ascending=True, na_option='bottom')
        rankingDF['rankAnnRet'] = rankingDF.annRet.rank(ascending=False, na_option='bottom')
        rankingDF['rankMaxDD'] = rankingDF.maxDD.rank(ascending=False, na_option='bottom')
        rankingDF['rankStability'] = rankingDF.stability.rank(ascending=False, na_option='bottom')
        rankingDF['rankCalmer'] = rankingDF.calmer.rank(ascending=False, na_option='bottom')
    
    print rankingDF.shape
    rankingDF['rankRaw'] = (rankingDF.rankSharpeRatio + rankingDF.rankAnnVol + rankingDF.rankAnnRet + rankingDF.rankMaxDD + rankingDF.rankStability + rankingDF.rankCalmer ) / numOfMetrics
    
    if returnScore0to100:
        rankingDF['rank_0_to_100'] = 100*( rankingDF['rankRaw'] / len(rankingDF['rankRaw']) )
        if score100isBest:
            rankingDF['rank_100_to_0'] = 100*( 1-rankingDF['rankRaw']/len(rankingDF['rankRaw']) )
            rankingDF = rankingDF.sort(columns='rank_100_to_0',ascending=False)
            rankingDF['rankFinal'] = rankingDF['rank_100_to_0']
        else:
            rankingDF = rankingDF.sort(columns='rank_0_to_100',ascending=True)
            rankingDF['rankFinal'] = rankingDF['rank_0_to_100']
    else:
        rankingDF = rankingDF.sort(columns='rankRaw',ascending=True)
        rankingDF['rankFinal'] = rankingDF['rankRaw']
            
    return {'ranks' : rankingDF['rankFinal'], 
            'allRankInfo' : rankingDF, 
            'allInputsWithMinDay' : algosWithMinDays }



In [52]:

    
def buildPortfolio_walkForward(tsDict, combineMethod='rank', numToSelect=5, minDaysOfTrading=100,
                               startDate=pd.datetime(2013,6,1), endDate=pd.datetime(2014,12,1), portRebalDays=90,
                               useCumulativeHistoryForCalcs=True, filterCriteriaDict=None ):
    # 'combineMethod' :  
    #       'rank' : just equal weight rank all inputs and take Top N to build portfolio
    #       'correl' : take all inputs that pass filters and take the N least correlated. Starting with highest ranked one
    #       'coint' : take all inputs that pass filters and take the N best cointegrated. Starting with highest ranked one
    
    if filterCriteriaDict is None:
        filterValuesDict = {'AnnualReturn' : -0.99,
                             'SharpeValue' : -10.0,
                             'AnnualVolatility' : 1.99,
                             'Stability' : 0.00,
                             'MaxDrawdown' : -0.99,
                             'Normalize' : False,
                             'FilterOutLongOnly' : False}
    else:
        filterValuesDict = filterCriteriaDict
         
    tempStartDate = startDate
    tempEndDate = tempStartDate + timedelta(portRebalDays)
    count = 0
    wfDict = {}
    while tempEndDate < endDate:
        if useCumulativeHistoryForCalcs is None:
            tempStartDate = None
        print 'Computing Ranks on date:' + str(tempEndDate)
        tempRanked = filterAndRank(tsDict, minDaysOfTrading=minDaysOfTrading, startDate=tempStartDate, endDate=tempEndDate,
                                   useCumulativeHistoryForCalcs=useCumulativeHistoryForCalcs ,
                                   filterAnnualReturn=filterValuesDict.get('AnnualReturn') ,
                                   filterSharpeValue=filterValuesDict.get('SharpeValue') ,
                                   filterAnnualVolatilityValue=filterValuesDict.get('AnnualVolatility') ,
                                   filterStabilityValue=filterValuesDict.get('Stability') ,
                                   filterMaxDrawdownValue=filterValuesDict.get('MaxDrawdown') ,
                                   normalizeRanking=filterValuesDict.get('Normalize') ,
                                   filterOutLongOnly=filterValuesDict.get('FilterOutLongOnly') )
        
        
        
        selDict = {}
        # selDict['start'] = tempStartDate
        selDict['rankingDate'] = tempEndDate
        if tempRanked.get('ranks') is None:
            pass # selDict['holdingsID'] = []
            # selDict['holdingsTS'] = []
        else:
            if len(tempRanked.get('ranks')) < numToSelect:
                lastSel = len(tempRanked)
            else:
                lastSel = numToSelect
            print "num to select:" + str(lastSel)

            selDict['holdingsID'] = tempRanked.get('ranks').index.values[0:lastSel]
            tempFutureHoldingsData = {}
            tempHoldingsData = {}
            # now calc the equal weight index of these holdings over the future period
            futureEndDate = tempEndDate + timedelta(portRebalDays)
            for i in selDict['holdingsID']:
                # print(i)
                tempFutureHoldingsData[i] = sliceTS(tsDict.get(i),startDate=tempEndDate, endDate=futureEndDate)
                tempHoldingsData[i] = tsDict.get(i)
            selDict['futurePeriod_holdingsData'] = pd.DataFrame.from_dict(tempFutureHoldingsData)
            selDict['eqPort'] = calcEqualWeightIndex_fromDict(tempHoldingsData, indexStartValue=100,
                                                              startDate=tempEndDate, endDate=futureEndDate)
          #  selDict['eqWeightPort'] = selDict['eqWeightPort'].reindex(index=selDict['eqWeightPort'].index.normalize())
            selDict['eqPort_start'] = selDict['eqPort'].index[0]
            selDict['eqPort_end'] = selDict['eqPort'].index[-1]
            selDict['p_eqSharpe'] = sharpeRatio(selDict['eqPort'] )
            selDict['p_eqAnnRet'] = annualReturn(selDict['eqPort'] )
            selDict['p_eqAnnVol'] = annualVolatility(selDict['eqPort'] )
            selDict['p_eqStab'] = stabilityOfTimeseries(selDict['eqPort'] )
            selDict['p_maxDD'] = maxDrawdown(selDict['eqPort'] )
            selDict['p_TR'] = (selDict['eqPort'].iloc[-1] - 100) / 100
            
        wfDict[count] = selDict
        
        tempStartDate = tempEndDate
        tempEndDate = tempEndDate + timedelta(portRebalDays)
        count = count + 1
        
    if combineMethod == 'rank':
        # build the rebalanced portfolio based on the rankings determined above
        t = 5
        
    
    return { 'rebalances' : pd.DataFrame(wfDict).T }



In [98]:

    
def buildPortfolio_walkForward(tsDict, combineMethod='rank', numToSelect=5, minDaysOfTrading=100,
                               startDate=pd.datetime(2013,6,1), endDate=pd.datetime(2014,12,1), portRebalDays=90,
                               useCumulativeHistoryForCalcs=True, filterCriteriaDict=None ):
    # 'combineMethod' :  
    #       'rank' : just equal weight rank all inputs and take Top N to build portfolio
    #       'correl' : take all inputs that pass filters and take the N least correlated. Starting with highest ranked one
    #       'coint' : take all inputs that pass filters and take the N best cointegrated. Starting with highest ranked one
    
    if filterCriteriaDict is None:
        filterValuesDict = {'AnnualReturn' : -0.99,
                             'SharpeValue' : -10.0,
                             'AnnualVolatility' : 1.99,
                             'Stability' : 0.00,
                             'MaxDrawdown' : -0.99,
                             'Calmer' : 0.1,
                             'Normalize' : False,
                             'FilterOutLongOnly' : False}
    else:
        filterValuesDict = filterCriteriaDict
         
    tempStartDate = startDate
    tempEndDate = tempStartDate + timedelta(portRebalDays)
    count = 0
    wfDict = {}
    while tempEndDate < endDate:
        if useCumulativeHistoryForCalcs is None:
            tempStartDate = None
        print 'Computing Ranks on date:' + str(tempEndDate)
        tempRanked = filterAndRank(tsDict, minDaysOfTrading=minDaysOfTrading, 
                                   returnScore0to100=True, score100isBest=True,
                                   startDate=tempStartDate, endDate=tempEndDate,
                                   useCumulativeHistoryForCalcs=useCumulativeHistoryForCalcs ,
                                   filterAnnualReturn=filterValuesDict.get('AnnualReturn') ,
                                   filterSharpeValue=filterValuesDict.get('SharpeValue') ,
                                   filterAnnualVolatilityValue=filterValuesDict.get('AnnualVolatility') ,
                                   filterStabilityValue=filterValuesDict.get('Stability') ,
                                   filterMaxDrawdownValue=filterValuesDict.get('MaxDrawdown') ,
                                   filterCalmerValue=filterValuesDict.get('Calmer') ,
                                   normalizeRanking=filterValuesDict.get('Normalize') ,
                                   filterOutLongOnly=filterValuesDict.get('FilterOutLongOnly') )
        
        selDict = {}
        # selDict['start'] = tempStartDate
        selDict['rankingDate'] = tempEndDate
        if tempRanked.get('ranks') is None:
            pass # selDict['holdingsID'] = []
            # selDict['holdingsTS'] = []
        else:
            if len(tempRanked.get('ranks')) < numToSelect:
                lastSel = len(tempRanked)
            else:
                lastSel = numToSelect
            print "num to select:" + str(lastSel)

            selDict['holdingsID'] = tempRanked.get('ranks').index.values[0:lastSel]
            tempFutureHoldingsData = {}
            tempHoldingsData = {}
            # now calc the equal weight index of these holdings over the future period
            futureEndDate = tempEndDate + timedelta(portRebalDays)
            for i in selDict['holdingsID']:
                # print(i)
                tempFutureHoldingsData[i] = sliceTS(tsDict.get(i),startDate=tempEndDate, endDate=futureEndDate)
                tempHoldingsData[i] = tsDict.get(i)
            selDict['futurePeriod_holdingsData'] = pd.DataFrame.from_dict(tempFutureHoldingsData)
            selDict['eqPort'] = calcEqualWeightIndex_fromDict(tempHoldingsData, indexStartValue=100,
                                                              startDate=tempEndDate, endDate=futureEndDate)
          #  selDict['eqWeightPort'] = selDict['eqWeightPort'].reindex(index=selDict['eqWeightPort'].index.normalize())
            selDict['p_start'] = selDict['eqPort'].index[0]
            selDict['p_end'] = selDict['eqPort'].index[-1]
            selDict['p_sharpe'] = sharpeRatio(selDict['eqPort'] )
            selDict['p_annRet'] = annualReturn(selDict['eqPort'] )
            selDict['p_annVol'] = annualVolatility(selDict['eqPort'] )
            selDict['p_stab'] = stabilityOfTimeseries(selDict['eqPort'] )
            selDict['p_maxDD'] = maxDrawdown(selDict['eqPort'] )
            selDict['p_calmer'] = calmerRatio(selDict['eqPort'] )
            selDict['p_TR'] = (selDict['eqPort'].iloc[-1] - 100) / 100
            
        wfDict[count] = selDict
        
        tempStartDate = tempEndDate
        tempEndDate = tempEndDate + timedelta(portRebalDays)
        count = count + 1
        
    if combineMethod == 'rank':
        # build the rebalanced portfolio based on the rankings determined above
        t = 5
        
    
    return { 'rebalances' : pd.DataFrame(wfDict).T }



In [94]:

    
[ testStocksDict.get(i).index[0] for i in testStocksDict.keys() ]









    Out[94]:





[Timestamp('1993-01-29 00:00:00'),
 Timestamp('1970-01-02 00:00:00'),
 Timestamp('1970-01-02 00:00:00'),
 Timestamp('2006-07-13 00:00:00'),
 Timestamp('2006-04-10 00:00:00'),
 Timestamp('2006-07-13 00:00:00'),
 Timestamp('1980-12-12 00:00:00'),
 Timestamp('1970-01-02 00:00:00'),
 Timestamp('2004-11-18 00:00:00'),
 Timestamp('2006-04-28 00:00:00')]



In [95]:

    
tFil = filterAndRank(testStocksDict,minDaysOfTrading=100,startDate=pd.datetime(2012,1,1),endDate=pd.datetime(2013,1,1))









    



10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)



In [96]:

    
tFil.get('allRankInfo')









    Out[96]:






  
    
      
      sharpe
      annVol
      annRet
      maxDD
      stability
      calmer
      histDays
      rankSharpeRatio
      rankAnnVol
      rankAnnRet
      rankMaxDD
      rankStability
      rankCalmer
      rankRaw
      rank_0_to_100
      rank_100_to_0
      rankFinal
    
  
  
    
      GLD
       0.874050
       0.207534
       0.181395
      -0.294141
       0.967558
       0.616694
        2043
       1
       2
       3
       1
       2
       1
       1.666667
       23.809524
       76.190476
       76.190476
    
    
      XOM
       0.675686
       0.242846
       0.164088
      -0.416667
       0.983086
       0.393811
       10851
       2
       3
       4
       2
       1
       2
       2.333333
       33.333333
       66.666667
       66.666667
    
    
      AAPL
       0.573788
       0.494255
       0.283598
      -0.817623
       0.741028
       0.346856
        8084
       3
       7
       1
       6
       5
       3
       4.166667
       59.523810
       40.476190
       40.476190
    
    
      SLV
       0.490595
       0.379632
       0.186246
      -0.570805
       0.726106
       0.326286
        1681
       6
       6
       2
       4
       6
       4
       4.666667
       66.666667
       33.333333
       33.333333
    
    
      SPY
       0.493142
       0.196620
       0.096961
      -0.551855
       0.656780
       0.175701
        5018
       5
       1
       7
       3
       7
       6
       4.833333
       69.047619
       30.952381
       30.952381
    
    
      GE
       0.514100
       0.293027
       0.150645
      -0.855273
       0.910406
       0.176137
       10851
       4
       5
       5
       7
       3
       5
       4.833333
       69.047619
       30.952381
       30.952381
    
    
      IBM
       0.433804
       0.264190
       0.114606
      -0.693981
       0.899596
       0.165144
       10851
       7
       4
       6
       5
       4
       7
       5.500000
       78.571429
       21.428571
       21.428571



In [99]:

    
twf_port = buildPortfolio_walkForward(testStocksDict,numToSelect=3,minDaysOfTrading=100,startDate=pd.datetime(2003,1,1))









    



Computing Ranks on date:2003-04-01 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2003-06-30 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2003-09-28 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2003-12-27 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2004-03-26 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2004-06-24 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2004-09-22 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2004-12-21 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2005-03-21 00:00:00
10  : # of Inputs: 
5  : # of Inputs with data btwn startDate and endDate
5   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2005-06-19 00:00:00
10  : # of Inputs: 
6  : # of Inputs with data btwn startDate and endDate
6   : with >100 days history: 
5   : Those who pass quant filter criteria and move onto ranking algorithm: 
(5, 13)
num to select:3
Computing Ranks on date:2005-09-17 00:00:00
10  : # of Inputs: 
6  : # of Inputs with data btwn startDate and endDate
6   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2005-12-16 00:00:00
10  : # of Inputs: 
6  : # of Inputs with data btwn startDate and endDate
6   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2006-03-16 00:00:00
10  : # of Inputs: 
6  : # of Inputs with data btwn startDate and endDate
6   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2006-06-14 00:00:00
10  : # of Inputs: 
6  : # of Inputs with data btwn startDate and endDate
6   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2006-09-12 00:00:00
10  : # of Inputs: 
7  : # of Inputs with data btwn startDate and endDate
7   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2006-12-11 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2007-03-11 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2007-06-09 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2007-09-07 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
6   : Those who pass quant filter criteria and move onto ranking algorithm: 
(6, 13)
num to select:3
Computing Ranks on date:2007-12-06 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2008-03-05 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2008-06-03 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2008-09-01 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2008-11-30 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2009-02-28 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
9   : Those who pass quant filter criteria and move onto ranking algorithm: 
(9, 13)
num to select:3
Computing Ranks on date:2009-05-29 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2009-08-27 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
8   : Those who pass quant filter criteria and move onto ranking algorithm: 
(8, 13)
num to select:3
Computing Ranks on date:2009-11-25 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2010-02-23 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2010-05-24 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2010-08-22 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2010-11-20 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2011-02-18 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2011-05-19 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2011-08-17 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2011-11-15 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2012-02-13 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2012-05-13 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2012-08-11 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2012-11-09 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2013-02-07 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2013-05-08 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2013-08-06 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2013-11-04 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2014-02-02 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2014-05-03 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2014-08-01 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3
Computing Ranks on date:2014-10-30 00:00:00
10  : # of Inputs: 
10  : # of Inputs with data btwn startDate and endDate
10   : with >100 days history: 
7   : Those who pass quant filter criteria and move onto ranking algorithm: 
(7, 13)
num to select:3



In [100]:

    
twf_port.get('rebalances').head()









    Out[100]:






  
    
      
      eqPort
      futurePeriod_holdingsData
      holdingsID
      p_TR
      p_annRet
      p_annVol
      p_calmer
      p_end
      p_maxDD
      p_sharpe
      p_stab
      p_start
      rankingDate
    
  
  
    
      0
       Date
2003-04-02    102.032005
2003-04-03    10...
                      GE    SPY    XOM
Date          ...
       [XOM, GE, SPY]
        0.08988193
        0.2884709
       0.1518538
       5.167194
       2003-06-27 00:00:00
      -0.05582738
        1.899662
          0.7882328
       2003-04-02 00:00:00
       2003-04-01 00:00:00
    
    
      1
       Date
2003-07-01    100.507889
2003-07-02    10...
                      GE    SPY    XOM
Date          ...
       [XOM, GE, SPY]
        0.03658874
        0.1377545
       0.1441283
       3.443555
       2003-09-26 00:00:00
      -0.04000359
       0.9557775
          0.7138561
       2003-07-01 00:00:00
       2003-06-30 00:00:00
    
    
      2
       Date
2003-09-29    101.106142
2003-09-30     9...
                      GE    SPY    XOM
Date          ...
       [XOM, GE, SPY]
         0.0767147
        0.2627639
       0.1186764
       5.180571
       2003-12-26 00:00:00
      -0.05072104
        2.214121
          0.1874206
       2003-09-29 00:00:00
       2003-09-28 00:00:00
    
    
      3
       Date
2003-12-29    101.227854
2003-12-30    10...
                      GE    SPY    XOM
Date          ...
       [XOM, SPY, GE]
       0.002661502
      -0.03029395
       0.1411313
      -0.408635
       2004-03-25 00:00:00
       -0.0741345
      -0.2146509
       0.0008532885
       2003-12-29 00:00:00
       2003-12-27 00:00:00
    
    
      4
       Date
2004-03-29    101.166294
2004-03-30    10...
                      GE    SPY    XOM
Date          ...
       [XOM, GE, SPY]
        0.09026726
        0.3270339
       0.1212793
        7.75904
       2004-06-23 00:00:00
      -0.04214876
        2.696535
          0.3165257
       2004-03-29 00:00:00
       2004-03-26 00:00:00



In [101]:

    
twf_port.get('rebalances').iloc[0]['futurePeriod_holdingsData'].get('SPY').head(3)









    Out[101]:





Date
2003-04-02    69.78
2003-04-03    69.45
2003-04-04    69.86
Name: SPY, dtype: float64



In [113]:

    
normalize(twf_port.get('rebalances').iloc[0]['futurePeriod_holdingsData'],100).plot()
tempts = twf_port.get('rebalances').iloc[0]['eqPort']
normalize(tempts,100).plot(color="black",linewidth=5)









    Out[113]:





<matplotlib.axes._subplots.AxesSubplot at 0x11383bf90>



In [111]:

    
tempts.head()









    Out[111]:





Date
2003-04-02    102.032005
2003-04-03    101.861954
2003-04-04    102.630666
2003-04-07    102.272101
2003-04-08    102.243499
dtype: float64



In [64]:

    
tempts.tail()









    Out[64]:





Date
2003-06-23    111.165313
2003-06-24    111.237722
2003-06-25    109.798714
2003-06-26    110.426285
2003-06-27    108.988193
dtype: float64



In [66]:

    
twf_port.get('rebalances').iloc[0]['eqPort_end']









    Out[66]:





Timestamp('2003-06-27 00:00:00')



In [67]:

    
twf_port.get('rebalances').iloc[0]['eqPort_start']









    Out[67]:





Timestamp('2003-04-02 00:00:00')



In [68]:

    
numpy.__version__









    Out[68]:





'1.9.0'



In [65]:

    
from sklearn import datasets
iris = datasets.load_iris()
from sklearn.naive_bayes import GaussianNB



In [66]:

    
gnb = GaussianNB()
y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
print("Number of mislabeled points out of a total %d points : %d" % (iris.data.shape[0],(iris.target != y_pred).sum()))









    



Number of mislabeled points out of a total 150 points : 6



In [67]:

    
y_pred









    Out[67]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])



In [68]:

    
gnb









    Out[68]:





GaussianNB()



In [69]:

    
iris.target









    Out[69]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])



In [70]:

    
_=plt.hist(AAPL.AdjClose.pct_change().dropna(),bins=100)



In [ ]:

	AAPL	SPY
Date
1993-02-01	0.029412	0.007055
1993-02-02	-0.016327	0.002034
1993-02-03	-0.004149	0.010600
1993-02-04	-0.008333	0.004240
1993-02-05	-0.037815	-0.000667
1993-02-08	-0.013100	0.000000
1993-02-09	0.006726	-0.006893
1993-02-10	-0.019866	0.001343
1993-02-11	-0.011300	0.004919

GE	Open	High	Low	Close	Volume	AdjClose
Date
2014-12-15	25.02	25.03	24.41	24.59	63574000	24.36
2014-12-16	24.54	25.18	24.40	24.49	48388000	24.26
2014-12-17	24.61	24.72	24.06	24.66	77589300	24.43
2014-12-18	25.13	25.15	24.68	25.14	52044700	25.14
2014-12-19	25.12	25.70	25.00	25.62	86434700	25.62

	sharpe	annVol	annRet	maxDD	stability	calmer	histDays	rankSharpeRatio	rankAnnVol	rankAnnRet	rankMaxDD	rankStability	rankCalmer	rankRaw	rank_0_to_100	rank_100_to_0	rankFinal
GLD	0.874050	0.207534	0.181395	-0.294141	0.967558	0.616694	2043	1	2	3	1	2	1	1.666667	23.809524	76.190476	76.190476
XOM	0.675686	0.242846	0.164088	-0.416667	0.983086	0.393811	10851	2	3	4	2	1	2	2.333333	33.333333	66.666667	66.666667
AAPL	0.573788	0.494255	0.283598	-0.817623	0.741028	0.346856	8084	3	7	1	6	5	3	4.166667	59.523810	40.476190	40.476190
SLV	0.490595	0.379632	0.186246	-0.570805	0.726106	0.326286	1681	6	6	2	4	6	4	4.666667	66.666667	33.333333	33.333333
SPY	0.493142	0.196620	0.096961	-0.551855	0.656780	0.175701	5018	5	1	7	3	7	6	4.833333	69.047619	30.952381	30.952381
GE	0.514100	0.293027	0.150645	-0.855273	0.910406	0.176137	10851	4	5	5	7	3	5	4.833333	69.047619	30.952381	30.952381
IBM	0.433804	0.264190	0.114606	-0.693981	0.899596	0.165144	10851	7	4	6	5	4	7	5.500000	78.571429	21.428571	21.428571

	eqPort	futurePeriod_holdingsData	holdingsID	p_TR	p_annRet	p_annVol	p_calmer	p_end	p_maxDD	p_sharpe	p_stab	p_start	rankingDate
0	Date 2003-04-02 102.032005 2003-04-03 10...	GE SPY XOM Date ...	[XOM, GE, SPY]	0.08988193	0.2884709	0.1518538	5.167194	2003-06-27 00:00:00	-0.05582738	1.899662	0.7882328	2003-04-02 00:00:00	2003-04-01 00:00:00
1	Date 2003-07-01 100.507889 2003-07-02 10...	GE SPY XOM Date ...	[XOM, GE, SPY]	0.03658874	0.1377545	0.1441283	3.443555	2003-09-26 00:00:00	-0.04000359	0.9557775	0.7138561	2003-07-01 00:00:00	2003-06-30 00:00:00
2	Date 2003-09-29 101.106142 2003-09-30 9...	GE SPY XOM Date ...	[XOM, GE, SPY]	0.0767147	0.2627639	0.1186764	5.180571	2003-12-26 00:00:00	-0.05072104	2.214121	0.1874206	2003-09-29 00:00:00	2003-09-28 00:00:00
3	Date 2003-12-29 101.227854 2003-12-30 10...	GE SPY XOM Date ...	[XOM, SPY, GE]	0.002661502	-0.03029395	0.1411313	-0.408635	2004-03-25 00:00:00	-0.0741345	-0.2146509	0.0008532885	2003-12-29 00:00:00	2003-12-27 00:00:00
4	Date 2004-03-29 101.166294 2004-03-30 10...	GE SPY XOM Date ...	[XOM, GE, SPY]	0.09026726	0.3270339	0.1212793	7.75904	2004-06-23 00:00:00	-0.04214876	2.696535	0.3165257	2004-03-29 00:00:00	2004-03-26 00:00:00