In [1]:
import numpy
import math
import matplotlib.pyplot as plt
import random
In [2]:
gaussianParams=[[3,1],[0,0.5],[0.1,3]]
a=-10
b=10
numPoints=2000
In [31]:
batchAlpha=5
batchMaxIters=10000
stohAlpha=0.1
stohMaxIters=100000
miniBatchAlpha=5
miniBatchMaxIters=40000
In [4]:
def gaussian(X, x0, sigma):
res=((1/(math.sqrt(2*math.pi)*sigma))*numpy.exp(-((X-x0)**2)/(2*(sigma)**2)))
return res
In [5]:
def gaussianComb(X, params):
res=numpy.zeros(X.size)
for par in params:
x0=par[0]
sigma=par[1]
res+=gaussian(X,x0,sigma)
return res
In [6]:
X=numpy.linspace(a,b,numPoints)
y=gaussianComb(X,gaussianParams)
plt.plot(X,y)
plt.show()
In [8]:
def calcCostFunction(X, y, params):
times=len(params)
m=X.size
resX=gaussianComb(X,params)
costVector=(resX-y)**2
J=(1/(2*m))*numpy.sum(costVector)
return J
In [9]:
def dJ_dsigma(X, x0, sigma):
part1=(((X-x0)**2)/(sigma**3)) * gaussian(X, x0, sigma)
part2=(1/sigma) * gaussian(X, x0, sigma)
res=part1-part2
return res
In [10]:
def dJ_dx0(X, x0, sigma):
res=((X-x0)/(sigma**2))*gaussian(X, x0, sigma)
return res
In [11]:
def calculateGradient(X, y, params):
m=X.size
tempParams=params.tolist()
k=0
l=len(tempParams)
grad=numpy.zeros([l , 2])
mainPart=gaussianComb(X, params)-y
for par in tempParams:
x0=par[0]
sigma=par[1]
der1=(1/m) * numpy.sum(mainPart * dJ_dx0(X, x0, sigma))
der2=(1/m) * numpy.sum(mainPart * dJ_dsigma(X, x0, sigma))
grad[k,0]=der1
grad[k,1]=der2
k+=1
return grad
In [12]:
def gradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
l=len(inits)
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
return (currentParams, J)
In [13]:
def newGradientDescent(X, y, inits, initAlpha, maxIters):
currentParams=numpy.array(inits)
J=[]
alpha=initAlpha
alphaList=[]
l=len(inits)
prevJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
for i in range(maxIters):
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
curJ=calcCostFunction(X, y, currentParams)
while (curJ>prevJ):
currentParams=prevParams
alpha/=2
currentParams=currentParams - (alpha * calculateGradient(X, y, currentParams))
curJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
prevJ=curJ
J.append(curJ)
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [14]:
def stohasticGradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
l=len(inits)
m=X.size
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
randNum=random.randint(0,m-1)
x=numpy.array([X[randNum]])
randY=numpy.array([y[randNum]])
currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
return (currentParams, J)
In [15]:
def newStohasticGradientDescent(X, y, inits, initAlpha, maxIters):
currentParams=numpy.array(inits)
J=[]
alpha=initAlpha
alphaList=[]
l=len(inits)
m=X.size
meanInterval=100
prevMeanJ=calcCostFunction(X, y, currentParams)
sumJ=0
prevParams=currentParams
for i in range(maxIters):
randNum=random.randint(0,m-1)
x=numpy.array([X[randNum]])
randY=numpy.array([y[randNum]])
currentParams=currentParams - (alpha * calculateGradient(x, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
J.append(curJ)
sumJ+=curJ
if ((i%meanInterval)==0):
curMeanJ=curJ/meanInterval
if (curMeanJ>prevMeanJ):
alpha/=2
prevMeanJ=curMeanJ
sumJ=0
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [16]:
def miniBatchGradientDescent(X, y, inits, alpha, maxIters):
currentParams=numpy.array(inits)
J=[]
m=X.size
l=len(inits)
for i in range(maxIters):
J.append(calcCostFunction(X, y, currentParams))
leftBorder=random.randint(0,m-2)
rightBorder=random.randint(leftBorder+1,m-1)
randX=X[leftBorder:rightBorder]
randY=y[leftBorder:rightBorder]
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
return (currentParams, J)
In [102]:
def newMiniBatchGradientDescent(X, y, inits, initAlpha, maxIters, amountOfRandomNumbers):
currentParams=numpy.array(inits)
J=[]
m=X.size
alpha=initAlpha
alphaList=[]
#amountOfRandNumbers=500
l=len(inits)
prevJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
for i in range(maxIters):
randNumbers=[]
for j in range(amountOfRandomNumbers):
randNumbers.append(random.randint(0,m-1))
randX=numpy.array([X[r] for r in randNumbers])
randY=numpy.array([y[r] for r in randNumbers])
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
while (curJ>prevJ):
currentParams=prevParams
alpha/=2
currentParams=currentParams - (alpha * calculateGradient(randX, randY, currentParams))
curJ=calcCostFunction(X, y, currentParams)
prevParams=currentParams
prevJ=curJ
J.append(curJ)
alphaList.append(alpha)
return (currentParams, J, alphaList)
In [18]:
def drawGaussians(X,params,col,graphType):
y=gaussianComb(X,params)
if graphType==None:
plt.plot(X,y,color=col)
else:
plt.plot(X,y,graphType,color=col)
In [19]:
def draw(X, y, col, graphType):
if graphType==None:
plt.plot(X,y,color=col)
else:
plt.plot(X,y,graphType,color=col)
In [93]:
res=gradientDescent(X, y, [[-5,1],[3,2],[5,1]],5, batchMaxIters)
J=res[1]
#alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
#fig3=plt.figure(figsize=(7,7))
#plt.title('Learning rate')
#plt.plot(alphaList,color='green')
#plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
if (j<=0.000001):
print('J=0 on iteration '+str(i))
break
i+=1
In [101]:
res=newGradientDescent(X, y, [[-15,1],[3,2],[5,1]],100, batchMaxIters*3)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
if (j<=0.000001):
print('J=0 on iteration '+str(i))
break
i+=1
In [68]:
res=newStohasticGradientDescent(X, y, [[-2,1],[1,1],[2,1]],1, stohMaxIters*2)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
if (j<=0.000001):
print('J=0 on iteration '+str(i))
break
i+=1
In [87]:
res=newMiniBatchGradientDescent(X, y, [[-2,1],[1,1],[2,1]],10, 30000, 500)
J=res[1]
alphaList=res[2]
aprY=gaussianComb(X, res[0])
fig1=plt.figure(figsize=(7,7))
plt.title("Original and approximated function")
draw(X,aprY,'blue',None)
drawGaussians(X,gaussianParams,'red','x')
plt.show()
fig2=plt.figure(figsize=(7,7))
plt.title('Cost function J')
plt.plot(range(1,len(J)+1),J)
plt.show()
fig3=plt.figure(figsize=(7,7))
plt.title('Learning rate')
plt.plot(alphaList,color='green')
plt.show()
err=y-aprY
fig4=plt.figure(figsize=(7,7))
plt.title('Error')
draw(X,err,'magenta',None)
plt.show()
print("Coefs:")
print(res[0])
print("Mean error: "+str(numpy.mean(err)))
i=0
for j in J:
if (j<=0.0001):
print('J=0 on iteration '+str(i))
break
i+=1