In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from matplotlib.pyplot import plot, scatter, show
from pandas.io.data import DataReader
from datetime import datetime


/Users/davekensinger/anaconda/lib/python3.5/site-packages/pandas/io/data.py:35: FutureWarning: 
The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.
  FutureWarning)

In [2]:
#---------------------------------------------------------------
# Class CLA (algorithm for solving efficient portfolios)
#---------------------------------------------------------------
class CLA:
    def __init__(self,mean,covar,lB,uB):
        # Initialize the class
        self.mean=mean
        self.covar=covar
        self.lB=lB
        self.uB=uB
        self.w=[] # solution
        self.l=[] # lambdas
        self.g=[] # gammas
        self.f=[] # free weights
#---------------------------------------------------------------
    def solve(self):
        # Compute the turning points,free sets and weights
        f,w=self.initAlgo()
        self.w.append(np.copy(w)) # store solution
        self.l.append(None)
        self.g.append(None)
        self.f.append(f[:])
        while True:
            #1) case a): Bound one free weight
            l_in=None
            if len(f)>1:
                covarF,covarFB,meanF,wB=self.getMatrices(f)
                covarF_inv=np.linalg.inv(covarF)
                j=0
                for i in f:
                    l,bi=self.computeLambda(covarF_inv,covarFB,meanF,wB,j,[self.lB[i],self.uB[i]])
                    if l>l_in:l_in,i_in,bi_in=l,i,bi
                    j+=1
            #2) case b): Free one bounded weight
            l_out=None
            if len(f)<self.mean.shape[0]:
                b=self.getB(f)
                for i in b:
                    covarF,covarFB,meanF,wB=self.getMatrices(f+[i])
                    covarF_inv=np.linalg.inv(covarF)
                    l,bi=self.computeLambda(covarF_inv,covarFB,meanF,wB,meanF.shape[0]-1, \
                        self.w[-1][i])
                    if (self.l[-1]==None or l<self.l[-1]) and l>l_out:l_out,i_out=l,i                
            if (l_in==None or l_in<0) and (l_out==None or l_out<0):
                #3) compute minimum variance solution
                self.l.append(0)
                covarF,covarFB,meanF,wB=self.getMatrices(f)
                covarF_inv=np.linalg.inv(covarF)
                meanF=np.zeros(meanF.shape)
            else:
                #4) decide lambda
                if l_in>l_out:
                    self.l.append(l_in)
                    f.remove(i_in)
                    w[i_in]=bi_in # set value at the correct boundary
                else:
                    self.l.append(l_out)
                    f.append(i_out)
                covarF,covarFB,meanF,wB=self.getMatrices(f)
                covarF_inv=np.linalg.inv(covarF)
            #5) compute solution vector
            wF,g=self.computeW(covarF_inv,covarFB,meanF,wB)
            for i in range(len(f)):w[f[i]]=wF[i]
            self.w.append(np.copy(w)) # store solution
            self.g.append(g)
            self.f.append(f[:])
            if self.l[-1]==0:break
        #6) Purge turning points
        self.purgeNumErr(10e-10)
        self.purgeExcess()
#---------------------------------------------------------------    
    def initAlgo(self):
        # Initialize the algo
        #1) Form structured array
        a=np.zeros((self.mean.shape[0]),dtype=[('id',int),('mu',float)])
        b=[self.mean[i] for i in range(self.mean.shape[0])] # dump array into list
        a[:]=zip(range(self.mean.shape[0]),b) # fill structured array
        #2) Sort structured array
        b=np.sort(a,order='mu')
        #3) First free weight
        i,w=b.shape[0],np.copy(self.lB)
        while sum(w)<1:
            i-=1
            w[b[i][0]]=self.uB[b[i][0]]
        w[b[i][0]]+=1-sum(w)
        return [b[i][0]],w
#---------------------------------------------------------------    
    def computeBi(self,c,bi):
        if c>0:
            bi=bi[1][0]
        if c<0:
            bi=bi[0][0]
        return bi
#---------------------------------------------------------------
    def computeW(self,covarF_inv,covarFB,meanF,wB):
        #1) compute gamma
        onesF=np.ones(meanF.shape)
        g1=np.dot(np.dot(onesF.T,covarF_inv),meanF)
        g2=np.dot(np.dot(onesF.T,covarF_inv),onesF)
        if wB==None:
            g,w1=float(-self.l[-1]*g1/g2+1/g2),0
        else:
            onesB=np.ones(wB.shape)
            g3=np.dot(onesB.T,wB)
            g4=np.dot(covarF_inv,covarFB)
            w1=np.dot(g4,wB)
            g4=np.dot(onesF.T,w1)
            g=float(-self.l[-1]*g1/g2+(1-g3+g4)/g2)
        #2) compute weights
        w2=np.dot(covarF_inv,onesF)
        w3=np.dot(covarF_inv,meanF)
        return -w1+g*w2+self.l[-1]*w3,g
#---------------------------------------------------------------
    def computeLambda(self,covarF_inv,covarFB,meanF,wB,i,bi):
        #1) C
        onesF=np.ones(meanF.shape)
        c1=np.dot(np.dot(onesF.T,covarF_inv),onesF)
        c2=np.dot(covarF_inv,meanF)
        c3=np.dot(np.dot(onesF.T,covarF_inv),meanF)
        c4=np.dot(covarF_inv,onesF)
        c=-c1*c2[i]+c3*c4[i]
        if c==0:return None,None
        #2) bi
        if type(bi)==list:bi=self.computeBi(c,bi)
        #3) Lambda
        if wB==None:
            # All free assets
            return float((c4[i]-c1*bi)/c),bi
        else:
            onesB=np.ones(wB.shape)
            l1=np.dot(onesB.T,wB)
            l2=np.dot(covarF_inv,covarFB)
            l3=np.dot(l2,wB)
            l2=np.dot(onesF.T,l3)
            return float(((1-l1+l2)*c4[i]-c1*(bi+l3[i]))/c),bi
#---------------------------------------------------------------
    def getMatrices(self,f):
        # Slice covarF,covarFB,covarB,meanF,meanB,wF,wB
        covarF=self.reduceMatrix(self.covar,f,f)
        meanF=self.reduceMatrix(self.mean,f,[0])
        b=self.getB(f)
        covarFB=self.reduceMatrix(self.covar,f,b)
        wB=self.reduceMatrix(self.w[-1],b,[0])
        return covarF,covarFB,meanF,wB
#---------------------------------------------------------------
    def getB(self,f):
        return self.diffLists(range(self.mean.shape[0]),f)
#---------------------------------------------------------------
    def diffLists(self,list1,list2):
        return list(set(list1)-set(list2))
#---------------------------------------------------------------
    def reduceMatrix(self,matrix,listX,listY):
        # Reduce a matrix to the provided list of rows and columns
        if len(listX)==0 or len(listY)==0:return
        matrix_=matrix[:,listY[0]:listY[0]+1]
        for i in listY[1:]:
            a=matrix[:,i:i+1]
            matrix_=np.append(matrix_,a,1)
        matrix__=matrix_[listX[0]:listX[0]+1,:]
        for i in listX[1:]:
            a=matrix_[i:i+1,:]
            matrix__=np.append(matrix__,a,0)
        return matrix__
#---------------------------------------------------------------    
    def purgeNumErr(self,tol):
        # Purge violations of inequality constraints (associated with ill-conditioned covar matrix)
        i=0
        while True:
            flag=False
            if i==len(self.w):break
            if abs(sum(self.w[i])-1)>tol:
                flag=True
            else:
                for j in range(self.w[i].shape[0]):
                    if self.w[i][j]-self.lB[j]<-tol or self.w[i][j]-self.uB[j]>tol:
                        flag=True;break
            if flag==True:
                del self.w[i]
                del self.l[i]
                del self.g[i]
                del self.f[i]
            else:
                i+=1
        return
#---------------------------------------------------------------    
    def purgeExcess(self):
        # Remove violations of the convex hull
        i,repeat=0,False
        while True:
            if repeat==False:i+=1
            if i==len(self.w)-1:break
            w=self.w[i]
            mu=np.dot(w.T,self.mean)[0,0]
            j,repeat=i+1,False
            while True:
                if j==len(self.w):break
                w=self.w[j]
                mu_=np.dot(w.T,self.mean)[0,0]
                if mu<mu_:
                    del self.w[i]
                    del self.l[i]
                    del self.g[i]
                    del self.f[i]
                    repeat=True
                    break
                else:
                    j+=1
        return
#---------------------------------------------------------------
    def getMinVar(self):
        # Get the minimum variance solution
        var=[]
        for w in self.w:
            a=np.dot(np.dot(w.T,self.covar),w)
            var.append(a)
        return min(var)**.5,self.w[var.index(min(var))]
#---------------------------------------------------------------
    def getMaxSR(self):
        # Get the max Sharpe ratio portfolio
        #1) Compute the local max SR portfolio between any two neighbor turning points
        w_sr,sr=[],[]
        for i in range(len(self.w)-1):
            w0=np.copy(self.w[i])
            w1=np.copy(self.w[i+1])
            kargs={'minimum':False,'args':(w0,w1)}
            a,b=self.goldenSection(self.evalSR,0,1,**kargs)
            w_sr.append(a*w0+(1-a)*w1)
            sr.append(b)
        return max(sr),w_sr[sr.index(max(sr))]
#---------------------------------------------------------------
    def evalSR(self,a,w0,w1):
        # Evaluate SR of the portfolio within the convex combination
        w=a*w0+(1-a)*w1
        b=np.dot(w.T,self.mean)[0,0]
        c=np.dot(np.dot(w.T,self.covar),w)[0,0]**.5
        return b/c
#---------------------------------------------------------------
    def goldenSection(self,obj,a,b,**kargs):
        # Golden section method. Maximum if kargs['minimum']==False is passed 
        from math import log,ceil
        tol,sign,args=1.0e-9,1,None
        if 'minimum' in kargs and kargs['minimum']==False:sign=-1
        if 'args' in kargs:args=kargs['args']
        numIter=int(ceil(-2.078087*log(tol/abs(b-a))))
        r=0.618033989
        c=1.0-r
        # Initialize
        x1=r*a+c*b;x2=c*a+r*b
        f1=sign*obj(x1,*args);f2=sign*obj(x2,*args)
        # Loop
        for i in range(numIter):
            if f1>f2:
                a=x1
                x1=x2;f1=f2
                x2=c*a+r*b;f2=sign*obj(x2,*args)
            else:
                b=x2
                x2=x1;f2=f1
                x1=r*a+c*b;f1=sign*obj(x1,*args)
        if f1<f2:return x1,sign*f1
        else:return x2,sign*f2
#---------------------------------------------------------------
    def efFrontier(self,points):
        # Get the efficient frontier
        mu,sigma,weights=[],[],[]
        a=np.linspace(0,1,points/len(self.w))[:-1] # remove the 1, to avoid duplications
        b=range(len(self.w)-1)
        for i in b:
            w0,w1=self.w[i],self.w[i+1]
            if i==b[-1]:a=np.linspace(0,1,points/len(self.w)) # include the 1 in the last iteration
            for j in a:
                w=w1*j+(1-j)*w0
                weights.append(np.copy(w))
                mu.append(np.dot(w.T,self.mean)[0,0])
                sigma.append(np.dot(np.dot(w.T,self.covar),w)[0,0]**.5)
        return mu,sigma,weights
#---------------------------------------------------------------
# end Class CLA
#---------------------------------------------------------------

In [3]:
#---------------------------------------------------------------
def plot2D(x,y,xLabel='',yLabel='',title='',pathChart=None):
    import matplotlib.pyplot as mpl
    fig=mpl.figure()
    ax=fig.add_subplot(1,1,1) #one row, one column, first plot
    ax.plot(x,y,color='blue')
    ax.set_xlabel(xLabel)
    ax.set_ylabel(yLabel,rotation=90)
    mpl.xticks(rotation='vertical')
    mpl.title(title)
    if pathChart==None:
        mpl.show()
    else:
        mpl.savefig(pathChart)
    mpl.clf() # reset pylab
    return

In [4]:
#---------------------------------------------------------------
# Main Section
#---------------------------------------------------------------
start = datetime(2011,8,11)
end = datetime.today()

# stock list
L = np.array(['AAPL', 'SPY', 'DDD', 'SBUX'])

#set up DataFrames
daily_price  = pd.DataFrame(index=pd.bdate_range(start, end)) # business days
daily_return = pd.DataFrame(index=pd.bdate_range(start, end))

In [5]:
# get daily equity "Adj Close" from start to end
# would like to build a database of SP500 stocks instead

daily_price = DataReader(L, 'yahoo', start, end)['Adj Close']
daily_return = np.log(1+daily_price.pct_change())  # for a continuous return number
# cumulative_return = daily_return.cumsum()        useful for a normalized return chart

In [6]:
# create expected return vector, stdev vector and covariance, correlation matrices

R = daily_return.mean() # expected return vector
S = daily_return.std()  # expected standard deviation vector
C = daily_return.cov()  # covariance matrix
Corr =  daily_return.corr() # and a correlation matrix for info

In [7]:
R


Out[7]:
AAPL    0.000614
DDD     0.000159
SBUX    0.000927
SPY     0.000566
dtype: float64

In [8]:
#2) Load data, set seed
headers=L
headers


Out[8]:
array(['AAPL', 'SPY', 'DDD', 'SBUX'], 
      dtype='<U4')

In [9]:
mean=np.array(R)
type(mean)
mean.shape[0]


Out[9]:
4

In [10]:
lB=np.array([0.]*len(L))
lB


Out[10]:
array([ 0.,  0.,  0.,  0.])

In [11]:
uB=np.array([1.]*len(L))
uB


Out[11]:
array([ 1.,  1.,  1.,  1.])

In [12]:
covar=np.array(C)
covar


Out[12]:
array([[  2.83023908e-04,   1.54988750e-04,   8.51512971e-05,
          7.88953313e-05],
       [  1.54988750e-04,   1.36426469e-03,   1.15119500e-04,
          1.46760138e-04],
       [  8.51512971e-05,   1.15119500e-04,   2.26223156e-04,
          8.45306242e-05],
       [  7.88953313e-05,   1.46760138e-04,   8.45306242e-05,
          8.48804481e-05]])

In [13]:
#3) Invoke object
cla=CLA(mean,covar,lB,uB)
cla.solve()
print(cla.w) # print all turning points


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-13-86c9b1845d69> in <module>()
      1 #3) Invoke object
      2 cla=CLA(mean,covar,lB,uB)
----> 3 cla.solve()
      4 print(cla.w) # print all turning points

<ipython-input-2-45dac6181cbd> in solve(self)
     16     def solve(self):
     17         # Compute the turning points,free sets and weights
---> 18         f,w=self.initAlgo()
     19         self.w.append(np.copy(w)) # store solution
     20         self.l.append(None)

<ipython-input-2-45dac6181cbd> in initAlgo(self)
     74         a=np.zeros((self.mean.shape[0]),dtype=[('id',int),('mu',float)])
     75         b=[self.mean[i] for i in range(self.mean.shape[0])] # dump array into list
---> 76         a[:]=zip(range(self.mean.shape[0]),b) # fill structured array
     77         #2) Sort structured array
     78         b=np.sort(a,order='mu')

TypeError: a bytes-like object is required, not 'zip'

In [15]:
#4) Plot frontier
mu,sigma,weights=cla.efFrontier(100)
plot2D(sigma,mu,'Risk','Expected Excess Return','CLA-derived Efficient Frontier')


---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-15-753363e43f72> in <module>()
      1 #4) Plot frontier
----> 2 mu,sigma,weights=cla.efFrontier(100)
      3 plot2D(sigma,mu,'Risk','Expected Excess Return','CLA-derived Efficient Frontier')

<ipython-input-3-45dac6181cbd> in efFrontier(self, points)
    263         # Get the efficient frontier
    264         mu,sigma,weights=[],[],[]
--> 265         a=np.linspace(0,1,points/len(self.w))[:-1] # remove the 1, to avoid duplications
    266         b=range(len(self.w)-1)
    267         for i in b:

ZeroDivisionError: division by zero

In [16]:
#5) Get Maximum Sharpe ratio portfolio
sr,w_sr=cla.getMaxSR()
print(np.dot(np.dot(w_sr.T,cla.covar),w_sr)[0,0]**.5,sr)
print(w_sr)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-16-f30279bb0718> in <module>()
      1 #5) Get Maximum Sharpe ratio portfolio
----> 2 sr,w_sr=cla.getMaxSR()
      3 print(np.dot(np.dot(w_sr.T,cla.covar),w_sr)[0,0]**.5,sr)
      4 print(w_sr)

<ipython-input-3-45dac6181cbd> in getMaxSR(self)
    226             w_sr.append(a*w0+(1-a)*w1)
    227             sr.append(b)
--> 228         return max(sr),w_sr[sr.index(max(sr))]
    229 #---------------------------------------------------------------
    230     def evalSR(self,a,w0,w1):

ValueError: max() arg is an empty sequence

In [17]:
#6) Get Minimum Variance portfolio
mv,w_mv=cla.getMinVar()
print(mv)
print(w_mv)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-17-e11e3fc1e732> in <module>()
      1 #6) Get Minimum Variance portfolio
----> 2 mv,w_mv=cla.getMinVar()
      3 print(mv)
      4 print(w_mv)

<ipython-input-3-45dac6181cbd> in getMinVar(self)
    213             a=np.dot(np.dot(w.T,self.covar),w)
    214             var.append(a)
--> 215         return min(var)**.5,self.w[var.index(min(var))]
    216 #---------------------------------------------------------------
    217     def getMaxSR(self):

ValueError: min() arg is an empty sequence

In [ ]: