In [1]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.datasets import make_blobs
from sklearn.svm import LinearSVC
In [2]:
def huberizedHingeLoss(x, h):
if x > 1+h:
return 0
elif abs(1-x) <= h:
return ((1+h-x)**2)/(4*h)
else:
return 1-x
def hingeLoss(x):
return max([0, 1-x])
def misclassLoss(x):
return 1 if x <= 0 else 0
In [18]:
def compute_obj(x, y, w, C=1.0, h=0.5):
loss = np.vectorize(huberizedHingeLoss, excluded=['h'])
return np.dot(w, w) + (C/float(x.shape[0]))*sum(loss(y*np.dot(x,w), h))
#return (C/float(x.shape[0]))*sum(loss(y*np.dot(x,w), h))
In [19]:
def compute_grad(x, y, w, C=1.0, h=0.5):
p = y*np.dot(x, w)
gradW = np.zeros(w.shape[0], dtype=float)
def gradHuberHinge(i, j):
if p[i] > 1+h:
return 0
elif abs(1-p[i]) <= h:
return ((1+h-p[i])/(2*h))*(-y[i]*x[i][j])
else:
return (-y[i]*x[i][j])
for j in range(w.shape[0]):
sum_over_i = 0.0
for i in range(x.shape[0]):
sum_over_i += gradHuberHinge(i,j)
gradW[j] = 2*w[j] + (C/float(x.shape[0]))*sum_over_i
#gradW[j] = (C/float(x.shape[0]))*sum_over_i
return gradW
In [5]:
def add_bias_column(x):
return np.append(x, np.ones(x.shape[0]).reshape(x.shape[0],1), axis=1)
In [6]:
def my_gradient_descent(x, y, F, dF, eta=0.01, maxiter=1000):
w = np.zeros(x.shape[1])
for i in range(maxiter):
grad = dF(x,y,w)
print F(x,y,w), eta
w = w - eta*(grad/np.linalg.norm(grad))
return w
In [7]:
def backoff_gradient_descent(x, y, F, dF, eta=0.01, maxiter=1000):
# ignore eta
w = np.zeros(x.shape[1])
beta = 0.8
for i in range(maxiter):
eta = 1
val = F(x,y,w)
grad = dF(x,y,w)
grad_dir = grad/np.linalg.norm(grad)
while F(x, y, (w - eta * grad)) > F(x, y, w):
#print eta
eta = 0.5 * eta
print val, eta
w = w - eta * grad
return w
In [8]:
def dataset_fixed_cov(n,dim):
'''Generate 2 Gaussians samples with the same covariance matrix'''
C = np.array([[0., -0.23], [0.83, .23]])
X = np.r_[np.dot(np.random.randn(n, dim), C),
np.dot(np.random.randn(n, dim), C) + np.array([1, 1])]
y = np.hstack((-np.ones(n), np.ones(n)))
return X, y
In [9]:
x_train,y_train = dataset_fixed_cov(250,2)
plt.plot(x_train[:250,0],x_train[:250,1], 'o', color='red')
plt.plot(x_train[250:,0],x_train[250:,1], 'o', color='blue')
Out[9]:
In [10]:
x_test,y_test = dataset_fixed_cov(250,2)
plt.plot(x_test[:250,0],x_test[:250,1], 'o', color='red')
plt.plot(x_test[250:,0],x_test[250:,1], 'o', color='blue')
Out[10]:
In [11]:
class my_svm(object):
def __init__(self, gd):
self.learnt_w = None
self.gd = gd
def fit(self, x_train, y_train, eta=0.01, max_iter=1000):
x_copy = add_bias_column(x_train)
self.learnt_w = self.gd(x_copy, y_train, compute_obj, compute_grad, eta, max_iter)
def predict(self, x_test):
x_copy = add_bias_column(x_test)
y = np.dot(x_copy, self.learnt_w)
y[y<0] = -1
y[y>0] = 1
return y
def score(self, x_test, y_test):
y_predict = self.predict(x_test)
bools = y_predict == y_test
accuracy = bools[bools == True].shape[0]/float(bools.shape[0])
return accuracy
In [20]:
svm = my_svm(backoff_gradient_descent)
svm.fit(x_train, y_train, 0.01, 100)
line = svm.learnt_w
xx = np.linspace(-3, 3)
yy = ((-line[0]/line[1])*xx)+(-line[2]/line[1]) # y = (-a/b)*x + (-c/b)
plt.plot(xx, yy)
print line
svm = LinearSVC()
svm.fit(x_train, y_train)
line = svm.coef_
xx = np.linspace(-3, 3)
yy = ((-svm.coef_[0][0]/svm.coef_[0][1])*xx)+(-svm.intercept_[0]/svm.coef_[0][1]) # y = (-a/b)*x + (-c/b)
print svm.coef_, svm.intercept_
plt.plot(xx, yy)
plt.plot(x_test[:250,0],x_test[:250,1], 'o', color='red')
plt.plot(x_test[250:,0],x_test[250:,1], 'o', color='blue')
Out[20]:
In [13]:
def compute_obj(x, y, w, C=1.0, h=0.5):
loss = np.vectorize(huberizedHingeLoss, excluded=['h'])
#return np.dot(w, w) + (C/float(x.shape[0]))*sum(loss(y*np.dot(x,w), h))
return (C/float(x.shape[0]))*sum(loss(y*np.dot(x,w), h))
def compute_grad(x, y, w, C=1.0, h=0.5):
p = y*np.dot(x, w)
gradW = np.zeros(w.shape[0], dtype=float)
def gradHuberHinge(i, j):
if p[i] > 1+h:
return 0
elif abs(1-p[i]) <= h:
return ((1+h-p[i])/(2*h))*(-y[i]*x[i][j])
else:
return (-y[i]*x[i][j])
for j in range(w.shape[0]):
sum_over_i = 0.0
for i in range(x.shape[0]):
sum_over_i += gradHuberHinge(i,j)
#gradW[j] = 2*w[j] + (C/float(x.shape[0]))*sum_over_i
gradW[j] = (C/float(x.shape[0]))*sum_over_i
return gradW
In [16]:
svm = my_svm(backoff_gradient_descent)
svm.fit(x_train, y_train, 0.01)
line = svm.learnt_w
print line
xx = np.linspace(-3, 3)
yy = ((-line[0]/line[1])*xx)+(-line[2]/line[1]) # y = (-a/b)*x + (-c/b)
plt.plot(xx, yy)
svm = LinearSVC()
svm.fit(x_train, y_train)
line = svm.coef_
xx = np.linspace(-3, 3)
yy = ((-svm.coef_[0][0]/svm.coef_[0][1])*xx)+(-svm.intercept_[0]/svm.coef_[0][1]) # y = (-a/b)*x + (-c/b)
print svm.coef_, svm.intercept_
plt.plot(xx, yy)
plt.plot(x_test[:250,0],x_test[:250,1], 'o', color='red')
plt.plot(x_test[250:,0],x_test[250:,1], 'o', color='blue')
Out[16]:
In [ ]:
In [ ]: