In [3]:
import numpy
import math
import matplotlib.pyplot as plt
import random
In [4]:
gaussianParams=[[-2,0.5],[2,0.3],[0,1]]
a=-7
b=7
numPoints=500
In [5]:
batchAlpha=5
batchMaxIters=5000
stohAlpha=0.1
stohMaxIters=100000
miniBatchAlpha=5
miniBatchMaxIters=10000
In [6]:
def gaussian(X, x0, sigma):
res=((1/(math.sqrt(2*math.pi)*sigma))*numpy.exp(-((X-x0)**2)/(2*(sigma)**2)))
return res
In [7]:
def gaussianComb(X, params):
res=numpy.zeros(X.size)
for par in params:
x0=par[0]
sigma=par[1]
res+=gaussian(X,x0,sigma)
return res
In [39]:
X=numpy.linspace(a,b,numPoints)
y=gaussianComb(X,gaussianParams)
plt.plot(X,y)
plt.show()
In [8]:
def calcCostFunction(X, y, params):
times=len(params)
m=X.size
resX=gaussianComb(X,params)
costVector=(resX-y)**2
J=(1/(2*m))*numpy.sum(costVector)
return J
In [9]:
def dJ_dsigma(X, x0, sigma):
part1=(((X-x0)**2)/(sigma**3)) * gaussian(X, x0, sigma)
part2=(1/sigma) * gaussian(X, x0, sigma)
res=part1-part2
return res
In [10]:
def dJ_dx0(X, x0, sigma):
res=((X-x0)/(sigma**2))*gaussian(X, x0, sigma)
return res
In [11]:
def calculateGradient(X, y, params):
m=X.size
tempParams=params.tolist()
k=0
l=len(tempParams)
grad=numpy.zeros([l , 2])
mainPart=gaussianComb(X, params)-y
for par in tempParams:
x0=par[0]
sigma=par[1]
der1=(1/m) * numpy.sum(mainPart * dJ_dx0(X, x0, sigma))
der2=(1/m) * numpy.sum(mainPart * dJ_dsigma(X, x0, sigma))
grad[k,0]=der1
grad[k,1]=der2
k+=1
return grad
In [12]:
def gradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
l=len(inits)
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
return (currentParams, J)
In [13]:
def newGradientDescent(X, y, inits, initAlpha, maxIters):
currentParams=numpy.array(inits)
J=[]
alpha=initAlpha
alphaList=[]
l=len(inits)
prevJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
for i in range(maxIters):
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
curJ=calcCostFunction(X, y, currentParams)
while (curJ>prevJ):
currentParams=prevParams
alpha/=2
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
curJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
prevJ=curJ
J.append(curJ)
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [14]:
def stohasticGradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
l=len(inits)
m=X.size
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
randNum=random.randint(0,m-1)
x=numpy.array([X[randNum]])
randY=numpy.array([y[randNum]])
currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
return (currentParams, J)
In [36]:
def newStohasticGradientDescent(X, y, inits, initAlpha, maxIters):
currentParams=numpy.array(inits)
J=[]
alpha=initAlpha
alphaList=[]
l=len(inits)
m=X.size
meanInterval=300
prevMeanJ=calcCostFunction(X, y, currentParams)
sumJ=0
prevParams=currentParams
for i in range(maxIters):
randNum=random.randint(0,m-1)
x=numpy.array([X[randNum]])
randY=numpy.array([y[randNum]])
currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
J.append(curJ)
sumJ+=curJ
if ((i%meanInterval)==0):
curMeanJ=curJ/meanInterval
if (curMeanJ>prevMeanJ):
alpha/=2
prevMeanJ=curMeanJ
sumJ=0
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [55]:
def miniBatchGradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
m=X.size
l=len(inits)
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
leftBorder=random.randint(0,m-2)
rightBorder=random.randint(leftBorder+1,m-1)
randX=X[leftBorder:rightBorder]
randY=y[leftBorder:rightBorder]
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
return (currentParams, J)
In [54]:
def newMiniBatchGradientDescent(X, y, inits, initAlpha, maxIters, amountOfRandomNumbers):
currentParams=numpy.array(inits)
J=[]
m=X.size
alpha=initAlpha
alphaList=[]
#amountOfRandNumbers=500
l=len(inits)
prevJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
for i in range(maxIters):
randNumbers=[]
for j in range(amountOfRandNumbers):
randNumbers.append(random.randint(0,m-1))
randX=numpy.array([X[r] for r in randNumbers])
randY=numpy.array([y[r] for r in randNumbers])
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
while (curJ>prevJ):
currentParams=prevParams
alpha/=2
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
prevJ=curJ
J.append(curJ)
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [18]:
def drawGaussians(X,params,col,graphType):
y=gaussianComb(X,params)
if graphType==None:
plt.plot(X,y,color=col)
else:
plt.plot(X,y,graphType,color=col)
In [19]:
def draw(X, y, col, graphType):
if graphType==None:
plt.plot(X,y,color=col)
else:
plt.plot(X,y,graphType,color=col)
In [53]:
res=newGradientDescent(X, y, [[-5,1],[1,1],[5,1]],500, batchMaxIters)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
In [23]:
res=miniBatchGradientDescent(X, y, [[3,3],[4,4]],miniBatchAlpha,miniBatchMaxIters)
J=res[1]
plt.plot(range(1,len(J)+1),J)
plt.show()
print(res[0])
In [42]:
res=newStohasticGradientDescent(X, y, [[-2,1],[1,1],[2,1]],1, stohMaxIters*2)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
In [56]:
res=newMiniBatchGradientDescent(X, y, [[-2,1],[1,1],[2,1]],10, miniBatchMaxIters)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))