In [3]:
import numpy
import math
import matplotlib.pyplot as plt
import random

In [4]:
gaussianParams=[[-2,0.5],[2,0.3],[0,1]]
a=-7
b=7
numPoints=500

In [5]:
batchAlpha=5
batchMaxIters=5000

stohAlpha=0.1
stohMaxIters=100000

miniBatchAlpha=5
miniBatchMaxIters=10000

In [6]:
def gaussian(X, x0, sigma):
    res=((1/(math.sqrt(2*math.pi)*sigma))*numpy.exp(-((X-x0)**2)/(2*(sigma)**2)))
    return res

In [7]:
def gaussianComb(X, params):
    res=numpy.zeros(X.size)
    for par in params:
            x0=par[0]
            sigma=par[1]
            res+=gaussian(X,x0,sigma)
    return res

In [39]:
X=numpy.linspace(a,b,numPoints)
y=gaussianComb(X,gaussianParams)
plt.plot(X,y)
plt.show()



In [8]:
def calcCostFunction(X, y, params):
    times=len(params)
    m=X.size
    resX=gaussianComb(X,params)
    costVector=(resX-y)**2
    J=(1/(2*m))*numpy.sum(costVector)
    return J

In [9]:
def dJ_dsigma(X, x0, sigma):
    part1=(((X-x0)**2)/(sigma**3)) * gaussian(X, x0, sigma)
    part2=(1/sigma) * gaussian(X, x0, sigma)
    res=part1-part2
    return res

In [10]:
def dJ_dx0(X, x0, sigma):
    res=((X-x0)/(sigma**2))*gaussian(X, x0, sigma)
    return res

In [11]:
def calculateGradient(X, y, params):
    m=X.size
    tempParams=params.tolist()
    k=0
    l=len(tempParams)
    grad=numpy.zeros([l , 2])
    mainPart=gaussianComb(X, params)-y
    for par in tempParams:
        x0=par[0]
        sigma=par[1]
        der1=(1/m) * numpy.sum(mainPart * dJ_dx0(X, x0, sigma))
        der2=(1/m) * numpy.sum(mainPart * dJ_dsigma(X, x0, sigma))
        grad[k,0]=der1
        grad[k,1]=der2
        k+=1
    return grad

In [12]:
def gradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    l=len(inits)
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
    return (currentParams, J)

In [13]:
def newGradientDescent(X, y, inits, initAlpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    alpha=initAlpha
    alphaList=[]
    l=len(inits)
    prevJ=calcCostFunction(X, y, currentParams)
    prevParams=currentParams
    for i in range(maxIters):
        currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        while (curJ>prevJ):
            currentParams=prevParams
            alpha/=2
            currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
            curJ=calcCostFunction(X, y, currentParams)
        prevParams=currentParams
        prevJ=curJ
        J.append(curJ)
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [14]:
def stohasticGradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    l=len(inits)
    m=X.size
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        randNum=random.randint(0,m-1)
        x=numpy.array([X[randNum]])
        randY=numpy.array([y[randNum]])
        currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
    return (currentParams, J)

In [36]:
def newStohasticGradientDescent(X, y, inits, initAlpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    alpha=initAlpha
    alphaList=[]
    l=len(inits)
    m=X.size
    meanInterval=300
    prevMeanJ=calcCostFunction(X, y, currentParams)
    sumJ=0
    prevParams=currentParams
    for i in range(maxIters):
        randNum=random.randint(0,m-1)
        x=numpy.array([X[randNum]])
        randY=numpy.array([y[randNum]])
        currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        J.append(curJ)
        sumJ+=curJ
        if ((i%meanInterval)==0):
            curMeanJ=curJ/meanInterval
            if (curMeanJ>prevMeanJ):
                alpha/=2
            prevMeanJ=curMeanJ
            sumJ=0
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [55]:
def miniBatchGradientDescent(X, y, inits, alpha, maxIters):
    currentParams=numpy.array(inits)
    J=[]
    m=X.size
    l=len(inits)
    for i in range(maxIters):
        J.append(calcCostFunction(X, y, currentParams))
        leftBorder=random.randint(0,m-2)
        rightBorder=random.randint(leftBorder+1,m-1)
        randX=X[leftBorder:rightBorder]
        randY=y[leftBorder:rightBorder]
        currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
    return (currentParams, J)

In [54]:
def newMiniBatchGradientDescent(X, y, inits, initAlpha, maxIters, amountOfRandomNumbers):
    currentParams=numpy.array(inits)
    J=[]
    m=X.size
    alpha=initAlpha
    alphaList=[]
    #amountOfRandNumbers=500
    l=len(inits)
    prevJ=calcCostFunction(X, y, currentParams)
    prevParams=currentParams
    for i in range(maxIters):
        randNumbers=[]
        for j in range(amountOfRandNumbers):
            randNumbers.append(random.randint(0,m-1))
        randX=numpy.array([X[r] for r in randNumbers])
        randY=numpy.array([y[r] for r in randNumbers])
        currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
        curJ=calcCostFunction(X, y, currentParams)
        while (curJ>prevJ):
            currentParams=prevParams
            alpha/=2
            currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
            curJ=calcCostFunction(X, y, currentParams)
        prevParams=currentParams
        prevJ=curJ
        J.append(curJ)
        alphaList.append(alpha)
    return (currentParams, J, alphaList)

In [18]:
def drawGaussians(X,params,col,graphType):
    y=gaussianComb(X,params)
    if graphType==None:
        plt.plot(X,y,color=col)
    else:
        plt.plot(X,y,graphType,color=col)

In [19]:
def draw(X, y, col, graphType):
    if graphType==None:
        plt.plot(X,y,color=col)
    else:
        plt.plot(X,y,graphType,color=col)

In [53]:
res=newGradientDescent(X, y, [[-5,1],[1,1],[5,1]],500, batchMaxIters)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))


Coefs:
[[ -2.00000000e+00   5.00000000e-01]
 [ -4.57966998e-15   1.00000000e+00]
 [  2.00000000e+00   3.00000000e-01]]
Mean error: -2.00508459965e-18

In [23]:
res=miniBatchGradientDescent(X, y, [[3,3],[4,4]],miniBatchAlpha,miniBatchMaxIters)
J=res[1]
plt.plot(range(1,len(J)+1),J)
plt.show()
print(res[0])


[[ 1.47160716  1.13088317]
 [ 1.52774185  1.12727246]]

In [42]:
res=newStohasticGradientDescent(X, y, [[-2,1],[1,1],[2,1]],1, stohMaxIters*2)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))


Coefs:
[[ -1.99999914e+00   4.99999967e-01]
 [  6.98097548e-06   1.00000005e+00]
 [  2.00000021e+00   2.99999962e-01]]
Mean error: 3.83536518517e-20

In [56]:
res=newMiniBatchGradientDescent(X, y, [[-2,1],[1,1],[2,1]],10, miniBatchMaxIters)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))


Coefs:
[[ -1.99999970e+00   5.00000010e-01]
 [  2.36796918e-06   1.00000001e+00]
 [  2.00000007e+00   2.99999993e-01]]
Mean error: -1.62920621005e-17