Kısıtsız eniyileme problemlerinin genel yapısı \begin{align} \text{minimize} & & E(w) \end{align}
Burada, $g_\tau$ bir arama yönüdür. Amaç, döngüler sonucunda başlangıç noktasından daha düşük (idealde en düşük) bir fonksyon değeri bulmaktır.
negatif gradyan yönünde arama \begin{align} g_\tau & = - \eta \nabla E(w_{\tau-1}) \end{align}
$\nabla E(w) = 0$ olduğunda algoritma çözüm noktasını değiştiremez. Bu tarz noktalara sabit nokta adı verilir.
Bazen algoritma gerçek en küçük noktaya da yakınsar $w^* = \arg\min_{w} E(w)$. Burada bu yakınsamanın hangi şartlarda ve hangi hızda gerçekleştiği önemli kavramsal sorulardır.
Burada, $\eta$'ya adım büyüklüğü veya öğrenme hızı gibi isimler verilir. Bu parametrenin düzgün seçilmesi pratikte epey deneme yanılma grektirmektedir. Düzgün seçilmezse algoritma yakınsamaz ve patlar, veya yakınsaması çok uzun sürer. Epey kavramsal çalışma var.
Sadece gradyan hesaplamak gerektiği için kullanılması çok kolay bir algoritmadır. Yapay zeka alanında çok büyük modellerde de uygulanabildiği için çok yaygındır.
In [2]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
from IPython.display import clear_output, display, HTML
from matplotlib import rc
mpl.rc('font',**{'size': 20, 'family':'sans-serif','sans-serif':['Helvetica']})
mpl.rc('text', usetex=True)
import time
import numpy as np
In [3]:
y = np.array([7.04, 7.95, 7.58, 7.81, 8.33, 7.96, 8.24, 8.26, 7.84, 6.82, 5.68])
x = np.array(np.linspace(-1,1,11))
N = len(x)
# Design matrix
#A = np.vstack((np.ones(N), x, x**2, x**3)).T
degree = 9
A = np.hstack([np.power(x.reshape(N,1),i) for i in range(degree+1)])
# Learning rate
eta = 0.001
# initial parameters
w = np.array(np.random.randn(degree+1))
W = []
Err = []
for epoch in range(50000):
# Error
err = y-A.dot(w)
# Total error
E = np.sum(err**2)/N
# Gradient
dE = -2.*A.T.dot(err)/N
if epoch%100 == 0:
#print(epoch,':',E)
# print(w)
W.append(w)
Err.append(E)
# Perfom one descent step
w = w - eta*dE
In [4]:
fig = plt.figure(figsize=(5,5))
left = -1.5
right = 1.5
xx = np.linspace(left,right,50)
AA = np.hstack((np.power(xx.reshape(len(xx),1),i) for i in range(degree+1)))
# Find best
A_orth, R = np.linalg.qr(A)
w_orth, res, rank, s = np.linalg.lstsq(A_orth, y)
w_star = np.linalg.solve(R, w_orth)
yy = AA.dot(w_star)
#ax.set_xlim((2,15))
#dots = plt.Line2D(x,y, linestyle='', markerfacecolor='b',marker='o', alpha=0.5, markersize=5)
#ax.add_line(dots)
plt.plot(x,y, linestyle='', markerfacecolor='b',marker='o', alpha=0.5, markersize=5)
plt.plot(xx, yy, linestyle=':', color='k', alpha=0.3)
ln = plt.Line2D(xdata=[], ydata=[], linestyle='-',linewidth=2)
ax = fig.gca()
ax.add_line(ln)
plt.close(fig)
ax.set_xlim((left,right))
ax.set_ylim((5,9))
def plot_gd(iteration=0):
w = W[iteration]
f = AA.dot(w)
#print(w)
ln.set_ydata(f)
ln.set_xdata(xx)
ax.set_title('$E = '+str(Err[iteration])+'$')
display(fig)
res = interact(plot_gd, iteration=(0,len(W)-1))
In [5]:
%matplotlib inline
import scipy as sc
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pylab as plt
df_arac = pd.read_csv(u'data/arac.csv',sep=';')
#df_arac[['Year','Car']]
BaseYear = 1995
x = np.matrix(df_arac.Year[0:]).T-BaseYear
y = np.matrix(df_arac.Car[0:]).T/1000000.
plt.plot(x+BaseYear, y, 'o-')
plt.xlabel('Yil')
plt.ylabel('Araba (Millions)')
plt.show()
In [6]:
from itertools import product
def Error_Surface(y, A, left=0, right=1, bottom=0, top=1, step=0.1):
W0 = np.arange(left,right, step)
W1 = np.arange(bottom,top, step)
ErrSurf = np.zeros((len(W1),len(W0)))
for i,j in product(range(len(W1)), range(len(W0))):
e = y - A*np.matrix([W0[j], W1[i]]).T
ErrSurf[i,j] = e.T*e/2
return ErrSurf
BaseYear = 1995
x = np.matrix(df_arac.Year[0:]).T-BaseYear
y = np.matrix(df_arac.Car[0:]).T/1000000.
# Setup the vandermonde matrix
N = len(x)
A = np.hstack((np.ones((N,1)), x))
left = -5
right = 15
bottom = -4
top = 6
step = 0.05
ErrSurf = Error_Surface(y, A, left=left, right=right, top=top, bottom=bottom)
plt.figure(figsize=(10,10))
#plt.imshow(ErrSurf, interpolation='nearest',
# vmin=0, vmax=10000,origin='lower',
# extent=(left,right,bottom,top), cmap='jet')
plt.contour(ErrSurf,
vmin=0, vmax=10000,origin='lower', levels=np.linspace(100,5000,10),
extent=(left,right,bottom,top), cmap='jet')
plt.xlabel('$w_0$')
plt.ylabel('$w_1$')
plt.title('Error Surface')
#plt.colorbar(orientation='horizontal')
plt.show()
In [8]:
# An implementation of Gradient Descent for solving linear a system
# Setup the Design matrix
N = len(x)
A = np.hstack((np.ones((N,1)), x))
# Starting point
w = np.matrix('[15; -6]')
# Number of iterations
EPOCH = 5000
# Learning rate: The following is the largest possible fixed rate for this problem
#eta = 0.00016
eta = 0.00015
Error = np.zeros((EPOCH))
W = np.zeros((2,EPOCH))
for tau in range(EPOCH):
# Calculate the error
e = y - A*w
# Store the intermediate results
W[0,tau] = w[0]
W[1,tau] = w[1]
Error[tau] = (e.T*e)/2
# Compute the gradient descent step
g = -A.T*e
w = w - eta*g
#print(w.T)
w_star = w
plt.figure(figsize=(8,8))
plt.imshow(ErrSurf, interpolation='nearest',
vmin=0, vmax=1000,origin='lower',
extent=(left,right,bottom,top),cmap='Blues_r')
plt.xlabel('w0')
plt.ylabel('w1')
ln = plt.Line2D(W[0,:300:1], W[1,:300:1], marker='o',markerfacecolor='w')
plt.gca().add_line(ln)
ln = plt.Line2D(w_star[0], w_star[1], marker='x',markerfacecolor='w')
plt.gca().add_line(ln)
plt.show()
plt.figure(figsize=(8,3))
plt.semilogy(Error)
plt.xlabel('Iteration tau')
plt.ylabel('Error')
plt.show()
In [9]:
%matplotlib inline
import matplotlib.pylab as plt
from notes_utilities import pnorm_ball_line
import time
from IPython import display
import numpy as np
#y = np.array([7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73])
y = np.array([8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68])
#y = np.array([9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74])
x = np.array([10., 8., 13., 9., 11., 14., 6., 4., 12., 7., 5.])
N = len(x)
# Design matrix
A = np.vstack((np.ones(N), x)).T
w_best, E, rank, s = np.linalg.lstsq(A, y)
err = y-A.dot(w_best)
E_min = np.sum(err**2)/N
def inspect_momentum(alpha = 0.005, beta = 0.97):
ln = pnorm_ball_line(mu=w_best, A=np.linalg.cholesky(np.linalg.inv(A.T.dot(A))),linewidth=1)
ln2 = pnorm_ball_line(mu=w_best, A=4*np.linalg.cholesky(np.linalg.inv(A.T.dot(A))),linewidth=1)
# initial parameters
w0 = np.array([2., 1.])
w = w0.copy()
p = np.zeros(2)
EPOCHS = 100
W = np.zeros((2,EPOCHS))
for epoch in range(EPOCHS):
# Error
err = y-A.dot(w)
W[:,epoch] = w
# Mean square error
E = np.sum(err**2)/N
# Gradient
dE = -2.*A.T.dot(err)/N
p = dE + beta*p
# if epoch%10 == 1:
# print(epoch,':',E)
# print(w)
# Perfom one descent step
w = w - alpha*p
# print(E_min)
plt.plot(W[0,:],W[1,:],'.-b')
plt.plot(w_best[0],w_best[1],'ro')
plt.plot(w0[0],w0[1],'ko')
plt.xlim((1.8,4.3))
plt.ylim((0,1.2))
plt.title('$\\alpha = $'+str(alpha)+' $\\beta = $'+str(beta))
plt.gca().add_line(ln)
plt.gca().add_line(ln2)
plt.show()
inspect_momentum(alpha=0.0014088, beta=0)
In [10]:
%matplotlib inline
from __future__ import print_function
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import matplotlib.pylab as plt
from IPython.display import clear_output, display, HTML
interact(inspect_momentum, alpha=(0, 0.02, 0.001), beta=(0, 0.99, 0.001))
Out[10]: