Sveučilište u Zagrebu
Fakultet elektrotehnike i računarstva
http://www.fer.unizg.hr/predmet/su
Ak. god. 2015./2016.
(c) 2015 Jan Šnajder
Verzija: 0.3 (2015-12-11)
In [2]:
import scipy as sp
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
%pylab inline
Sinopsis:
Prvo ćemo se fokusirati na linearan model i linearno odvojive probleme (tvrda granica)
- Matematika: Lagrangeovi multiplikatori
Zatim ćemo proširiti linearan model tako da može raditi s linearno neodvojivim problemima (meka granica)
Na kraju ćemo proširiti na nelinearan model (jezgreni trik)
- Matematika: Mercerove jezgre
$\mathrm{argmin}_{\mathbf{w},w_0}\frac{1}{2}\|\mathbf{w}\|^2$
tako da $\quad y^{(i)}(\mathbf{w}^\intercal\mathbf{x}^{(i)}+w_0) \geq 1, \quad i=1,\dots,N$
$\mathrm{argmin}_{\mathbf{w},w_0}\frac{1}{2}\|\mathbf{w}\|^2$
tako da $\quad y^{(i)}(\mathbf{w}^\intercal\mathbf{x}^{(i)}+w_0) \geq 1, \quad i=1,\dots,N$
Maksimizirati izraz \begin{align} \sum_{i=1}^N\alpha_i - \frac{1}{2}\sum_{i=1}^N \sum_{j=1}^N \alpha_i \alpha_j y^{(i)} y^{(j)} (\mathbf{x}^{(i)})^\intercal \mathbf{x}^{(j)} \end{align} tako da: \begin{align*} \alpha_i &\geq 0,\quad i=1,\dots,N \\ \sum_{i=1}^N\alpha_i y^{(i)} &= 0 \end{align*}
In [2]:
seven_X = sp.array([[2,1], [2,3], [1,2], [3,2], [5,2], [5,4], [6,3]])
seven_y = sp.array([1, 1, 1, 1, -1, -1, -1])
In [6]:
plot_problem(seven_X, seven_y)
In [3]:
from sklearn.svm import SVC
svc = SVC(kernel='linear')
svc.fit(seven_X, seven_y)
Out[3]:
In [8]:
plot_problem(seven_X, seven_y, svc.predict)
In [9]:
svc.predict(sp.array([1, 1]))
Out[9]:
In [10]:
svc.predict(sp.array([6, 1]))
Out[10]:
In [11]:
svc.predict(sp.array([3.5, 2]))
Out[11]:
In [12]:
svc.decision_function(seven_X)
Out[12]:
In [13]:
svc.support_
Out[13]:
In [14]:
svc.dual_coef_
Out[14]:
NB: Koeficijenti iz dual_coef_
ne odgovaraju vrijednostima $\alpha_i$ već vrijednostima $-\alpha_i y^{(i)}$
In [15]:
svc.support_vectors_
Out[15]:
In [38]:
def w(X, y, dc, sv):
s = 0
for alpha, i in zip(dc, sv):
s += -alpha * X[i]
return s
def w0(X, y, dc, sv):
s = 0
for i in sv:
r = 0
for alpha, j in zip(dc, sv) :
r += -alpha * sp.dot(X[i], X[j])
s += y[i] - r
return s / float(len(sv))
In [39]:
w(seven_X, seven_y, svc.dual_coef_[0], svc.support_)
Out[39]:
In [21]:
svc.coef_
Out[21]:
In [40]:
w0(seven_X, seven_y, svc.dual_coef_[0], svc.support_)
Out[40]:
In [41]:
svc.intercept_
Out[41]:
In [44]:
def h_primal(x, w, w0):
return sp.dot(w, x) + w0
def h_dual(x, X, y, dc, sv):
xs = [ -alpha * sp.dot(x, X[i]) for alpha, i in zip(dc, sv) ]
return sum(xs) + w0(X, y, dc, sv)
In [45]:
svc.decision_function(seven_X)
Out[45]:
In [46]:
map(lambda x: h_primal(x ,svc.coef_, svc.intercept_), seven_X)
Out[46]:
In [47]:
map(lambda x: h_dual(x, seven_X, seven_y, svc.dual_coef_[0], svc.support_), seven_X)
Out[47]:
$$ \mathrm{argmin}_{\mathbf{w},w_0} \Big\{\frac{1}{2}\|\mathbf{w}\|^2+ C\sum_{i=1}^N\xi_i \Big\} $$Uz ograničenja: $$ \begin{align*} y^{(i)}\big(\mathbf{w}^\intercal\mathbf{x}^{(i)}+w_0\big) &\geq 1 - \xi_i, \quad& i=1,\dots,N\\ \xi_i&\geq 0, & i=1,\dots,N\\ \end{align*} $$
Maksimizirati izraz \begin{align} \sum_{i=1}^N\alpha_i - \frac{1}{2}\sum_{i=1}^N \sum_{j=1}^N \alpha_i \alpha_j y^{(i)} y^{(j)} (\mathbf{x}^{(i)})^\intercal \mathbf{x}^{(j)} \end{align} tako da: \begin{align*} 0 \leq \alpha_i &\leq C,\quad i=1,\dots,N \\ \sum_{i=1}^N\alpha_i y^{(i)} &= 0, \quad i=1,\dots,N \end{align*}
In [4]:
X1, y1 = sp.append(seven_X, [[3,3]], axis=0), sp.append(seven_y, -1)
X2, y2 = sp.append(seven_X, [[2,2]], axis=0), sp.append(seven_y, -1)
In [50]:
def predict(model,x) :
#h = sp.dot(model.coef_,x)+model.intercept_
h = model.decision_function(x)
if h >= -1 and h <= 1 : return 0.5
else : return max(-1,min(1,h))
return h
In [51]:
from sklearn.metrics import hinge_loss
for C in [1e-2,1,1e2] :
svc = SVC(C=C,kernel='linear')
svc.fit(X1,y1)
plot_problem(X1,y1,lambda x: predict(svc,x)); show()
print svc.support_vectors_
print "margin = ", 2/sp.linalg.norm(svc.coef_)
print "loss = ", hinge_loss(y1,svc.decision_function(X1))
In [54]:
for C in [1e-2, 1, 1e2] :
svc = SVC(C=C, kernel='linear')
svc.fit(X2, y2)
plot_problem(X2, y2, lambda x: predict(svc,x)); show()
print svc.support_vectors_
print "margin = ", 2/sp.linalg.norm(svc.coef_)
print "loss = ", hinge_loss(y1,svc.decision_function(X2))
TODO (v. skriptu)
TODO (v. skriptu)
TODO (v. skriptu)
TODO (v. skriptu)
TODO
In [4]:
def plot_problem(X, y, h=None, surfaces=True) :
'''
Plots a two-dimensional labeled dataset (X,y) and, if function h(x) is given,
the decision boundaries (surfaces=False) or decision surfaces (surfaces=True)
'''
assert X.shape[1] == 2, "Dataset is not two-dimensional"
if h!=None :
# Create a mesh to plot in
r = 0.02 # mesh resolution
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, r),
np.arange(y_min, y_max, r))
XX=np.c_[xx.ravel(), yy.ravel()]
try:
Z_test = h(XX)
if shape(Z_test) == () :
# h returns a scalar when applied to a matrix; map explicitly
Z = sp.array(map(h,XX))
else :
Z = Z_test
except ValueError:
# can't apply to a matrix; map explicitly
Z = sp.array(map(h,XX))
# Put the result into a color plot
Z = Z.reshape(xx.shape)
if surfaces :
plt.contourf(xx, yy, Z, cmap=plt.cm.Pastel1)
else :
plt.contour(xx, yy, Z)
# Plot the dataset
scatter(X[:,0],X[:,1],c=y, cmap=plt.cm.Paired,marker='o',s=50);