In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import sklearn.svm
In [ ]:
%matplotlib inline
In [ ]:
ex6data1 = scipy.io.loadmat('ex6data1.mat')
X = ex6data1['X']
y = ex6data1['y'][:, 0]
In [ ]:
def plot_data(X, y, ax=None):
if ax == None:
fig, ax = plt.subplots(figsize=(7,5))
pos = y==1
neg = y==0
ax.scatter(X[pos,0], X[pos,1], marker='+', color='b')
ax.scatter(X[neg,0], X[neg,1], marker='o', color='r', s=5)
plot_data(X, y)
In [ ]:
svm = sklearn.svm.SVC(C=1, kernel='linear')
svm.fit(X, y)
np.mean(svm.predict(X) == y)
In [ ]:
svm.coef_
In [ ]:
fig, ax = plt.subplots(figsize=(7,5))
def draw_contour(X, model):
x1 = np.linspace(np.min(X[:,0]), np.max(X[:,0]), 200)
x2 = np.linspace(np.min(X[:,1]), np.max(X[:,1]), 200)
xx1, xx2 = np.meshgrid(x1, x2)
yy = model.predict(np.c_[xx1.flat, xx2.flat]).reshape(xx1.shape)
ax.contour(x1, x2, yy, levels=[0.5])
plot_data(X, y, ax)
draw_contour(X, svm)
You will now implement the Gaussian kernel to use with the SVM. You should complete the code in gaussianKernel. This notebook will not use it, however. An sklearn custom kernel should return a matrix of all kernel values. Feel free to implement gaussianKernel in the sklearn way, and later call svm.SVC(kernel=gaussianKernel).
In [ ]:
def gaussianKernel(x1, x2, sigma):
# ====================== YOUR CODE HERE ======================
# Instructions: Fill in this function to return the similarity between x1
# and x2 computed using a Gaussian kernel with bandwidth
# sigma
#
#
return 0
# =============================================================
The Gaussian Kernel between x1 = [1; 2; 1]
, x2 = [0; 4; -1]
, sigma = 2
should be about 0.324652
.
In [ ]:
gaussianKernel(x1=np.array([1, 2, 1]), x2=np.array([0, 4, -1]), sigma=2)
In [ ]:
ex6data2 = scipy.io.loadmat('ex6data2.mat')
X = ex6data2['X']
y = ex6data2['y'][:,0]
print(X.shape, y.shape)
In [ ]:
plot_data(X, y)
In [ ]:
model = sklearn.svm.SVC(C=1, gamma=100, kernel='rbf')
model.fit(X, y)
np.mean((model.predict(X) == y))
In [ ]:
fig, ax = plt.subplots()
plot_data(X, y, ax)
draw_contour(X, model)
In [ ]:
ex6data3 = scipy.io.loadmat('ex6data3.mat')
X = ex6data3['X']
y = ex6data3['y'][:, 0]
Xval = ex6data3['Xval']
yval = ex6data3['yval'][:, 0]
print(X.shape, y.shape, Xval.shape, yval.shape)
In [ ]:
plot_data(X, y)
In [ ]:
plot_data(Xval, yval)
This is a different dataset that you can use to experiment with. Try different values of C and sigma here, train a classifier on your training data, measure the cross validation error and find the values for C and sigma that minimize the cross validation error.
In [ ]:
import itertools
possible_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]
possible_gamma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]
cv_errors = np.zeros((len(possible_C), len(possible_gamma)))
# YOUR CODE GOES HERE
C = 7
gamma = 7
# ==================
In [ ]:
model = sklearn.svm.SVC(C=C, gamma=gamma, kernel='rbf')
model.fit(X, y)
fig, ax = plt.subplots()
plot_data(X, y, ax)
draw_contour(X, model)