In [1]:
import numpy
import math
import matplotlib.pyplot as plt
import random

In [2]:
gaussianParams=[[3,1],[0,0.5],[0.1,3]]
a=-10
b=10
numPoints=2000

In [31]:
batchAlpha=5
batchMaxIters=10000

stohAlpha=0.1
stohMaxIters=100000

miniBatchAlpha=5
miniBatchMaxIters=40000

In [4]:
def gaussian(X, x0, sigma):
    res=((1/(math.sqrt(2*math.pi)*sigma))*numpy.exp(-((X-x0)**2)/(2*(sigma)**2)))
    return res

In [5]:
def gaussianComb(X, params):
    res=numpy.zeros(X.size)
    for par in params:
            x0=par[0]
            sigma=par[1]
            res+=gaussian(X,x0,sigma)
    return res

In [6]:
X=numpy.linspace(a,b,numPoints)
y=gaussianComb(X,gaussianParams)
plt.plot(X,y)
plt.show()



In [8]:
def calcCostFunction(X, y, params):
    times=len(params)
    m=X.size
    resX=gaussianComb(X,params)
    costVector=(resX-y)**2
    J=(1/(2*m))*numpy.sum(costVector)
    return J

In [9]:
def dJ_dsigma(X, x0, sigma):
    part1=(((X-x0)**2)/(sigma**3)) * gaussian(X, x0, sigma)
    part2=(1/sigma) * gaussian(X, x0, sigma)
    res=part1-part2
    return res

In [10]:
def dJ_dx0(X, x0, sigma):
    res=((X-x0)/(sigma**2))*gaussian(X, x0, sigma)
    return res

In [11]:
def calculateGradient(X, y, params):
    m=X.size
    tempParams=params.tolist()
    k=0
    l=len(tempParams)
    grad=numpy.zeros([l , 2])
    mainPart=gaussianComb(X, params)-y
    for par in tempParams:
        x0=par[0]
        sigma=par[1]
        der1=(1/m) * numpy.sum(mainPart * dJ_dx0(X, x0, sigma))
        der2=(1/m) * numpy.sum(mainPart * dJ_dsigma(X, x0, sigma))
        grad[k,0]=der1
        grad[k,1]=der2
        k+=1
    return grad

In [12]:
def gradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    l=len(inits)
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
    return (currentParams, J)

In [13]:
def newGradientDescent(X, y, inits, initAlpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    alpha=initAlpha
    alphaList=[]
    l=len(inits)
    prevJ=calcCostFunction(X, y, currentParams)
    prevParams=currentParams
    for i in range(maxIters):
        currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        while (curJ>prevJ):
            currentParams=prevParams
            alpha/=2
            currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
            curJ=calcCostFunction(X, y, currentParams)
        prevParams=currentParams
        prevJ=curJ
        J.append(curJ)
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [14]:
def stohasticGradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    l=len(inits)
    m=X.size
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        randNum=random.randint(0,m-1)
        x=numpy.array([X[randNum]])
        randY=numpy.array([y[randNum]])
        currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
    return (currentParams, J)

In [15]:
def newStohasticGradientDescent(X, y, inits, initAlpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    alpha=initAlpha
    alphaList=[]
    l=len(inits)
    m=X.size
    meanInterval=100
    prevMeanJ=calcCostFunction(X, y, currentParams)
    sumJ=0
    prevParams=currentParams
    for i in range(maxIters):
        randNum=random.randint(0,m-1)
        x=numpy.array([X[randNum]])
        randY=numpy.array([y[randNum]])
        currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        J.append(curJ)
        sumJ+=curJ
        if ((i%meanInterval)==0):
            curMeanJ=curJ/meanInterval
            if (curMeanJ>prevMeanJ):
                alpha/=2
            prevMeanJ=curMeanJ
            sumJ=0
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [16]:
def miniBatchGradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    m=X.size
    l=len(inits)
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        leftBorder=random.randint(0,m-2)
        rightBorder=random.randint(leftBorder+1,m-1)
        randX=X[leftBorder:rightBorder]
        randY=y[leftBorder:rightBorder]
        currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
    return (currentParams, J)

In [102]:
def newMiniBatchGradientDescent(X, y, inits, initAlpha, maxIters, amountOfRandomNumbers):
    currentParams=numpy.array(inits)
    J=[]
    m=X.size
    alpha=initAlpha
    alphaList=[]
    #amountOfRandNumbers=500
    l=len(inits)
    prevJ=calcCostFunction(X, y, currentParams)
    prevParams=currentParams
    for i in range(maxIters):
        randNumbers=[]
        for j in range(amountOfRandomNumbers):
            randNumbers.append(random.randint(0,m-1))
        randX=numpy.array([X[r] for r in randNumbers])
        randY=numpy.array([y[r] for r in randNumbers])
        currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        while (curJ>prevJ):
            currentParams=prevParams
            alpha/=2
            currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
            curJ=calcCostFunction(X, y, currentParams)
        prevParams=currentParams
        prevJ=curJ
        J.append(curJ)
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [18]:
def drawGaussians(X,params,col,graphType):
    y=gaussianComb(X,params)
    if graphType==None:
        plt.plot(X,y,color=col)
    else:
        plt.plot(X,y,graphType,color=col)

In [19]:
def draw(X, y, col, graphType):
    if graphType==None:
        plt.plot(X,y,color=col)
    else:
        plt.plot(X,y,graphType,color=col)

In [93]:
res=gradientDescent(X, y, [[-5,1],[3,2],[5,1]],5, batchMaxIters)
J=res[1]
#alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
#fig3=plt.figure(figsize=(7,7))
#plt.title('Learning rate')
#plt.plot(alphaList,color='green')
#plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
    if (j<=0.000001):
        print('J=0 on iteration '+str(i))
        break
    i+=1


Coefs:
[[  2.98132836e+00   1.02597825e+00]
 [ -1.94538584e-04   5.00043311e-01]
 [  3.01723481e+00   9.74752581e-01]]
Mean error: 8.54433902103e-14
J=0 on iteration 5383

In [101]:
res=newGradientDescent(X, y, [[-15,1],[3,2],[5,1]],100, batchMaxIters*3)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
    if (j<=0.000001):
        print('J=0 on iteration '+str(i))
        break
    i+=1


Coefs:
[[ -1.49999998e+01   1.00000096e+00]
 [ -7.79028374e-03   4.91759223e-01]
 [  2.82242750e+00   1.00905531e+00]]
Mean error: 0.0499320730531

In [68]:
res=newStohasticGradientDescent(X, y, [[-2,1],[1,1],[2,1]],1, stohMaxIters*2)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
    if (j<=0.000001):
        print('J=0 on iteration '+str(i))
        break
    i+=1


Coefs:
[[-1.88574841  0.54733383]
 [ 1.97339073  0.30658723]
 [ 3.45944345  3.74351628]]
Mean error: 0.0123857212037

In [87]:
res=newMiniBatchGradientDescent(X, y, [[-2,1],[1,1],[2,1]],10, 30000, 500)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
    if (j<=0.0001):
        print('J=0 on iteration '+str(i))
        break
    i+=1


Coefs:
[[ -2.00000000e+00   5.00000000e-01]
 [  6.41559255e-13   1.00000000e+00]
 [  2.00000000e+00   3.00000000e-01]]
Mean error: 2.01722712153e-18
J=0 on iteration 2049