One of the most successful learning method historically, (but now getting superceded by newer techniques!):
Advantages of SVM
Disadvantages of SVM
"The goal of a support vector machine is to find the optimal separating hyperplane which maximizes the margin of the training data."
So lets understand the concept of Margin
Given a two dimensional data space and linearly separable set of points, which line should I choose?
We would should choose a classification line which would provide maximum distance from the nearest point to the line. This classification line is likely to provide us with best possible generalization line. This concept of choosing the largest distance is called Fat Margins.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (6, 6)
In [3]:
from ipywidgets import interact
In [4]:
np.random.seed(1234)
In [5]:
def plot_points(p):
X = np.r_[np.random.randn(p, 2) - [2, 2], np.random.randn(p, 2) + [2, 2]]
y = [0] * p + [1] * p
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu, s=40)
plt.xlim(-6,6)
plt.ylim(-6,6)
plt.show()
In [6]:
interact(plot_points, p=(20,40,2))
Out[6]:
In [11]:
from sklearn import svm
In [14]:
def plot_margins(p):
X = np.r_[np.random.randn(p, 2) - [2, 2], np.random.randn(p, 2) + [2, 2]]
y = [0] * p + [1] * p
clf = svm.SVC(kernel="linear")
clf.fit(X,y)
clf.predict(X)
# get the separating hyperplane
w = clf.coef_[0]
a = -w[0] / w[1]
x_line = np.linspace(-6, 6)
y_line = a * x_line - (clf.intercept_[0]) / w[1]
# plot the parallels to the separating hyperplane that pass through the support vectors
b = clf.support_vectors_[0]
y_down = a * x_line + (b[1] - a * b[0])
b = clf.support_vectors_[-1]
y_up = a * x_line + (b[1] - a * b[0])
# plot the line, the points, and the nearest vectors to the plane
plt.plot(x_line, y_line, 'k-')
plt.plot(x_line, y_down, 'k--')
plt.plot(x_line, y_up, 'k--')
plt.fill_between(x_line, y_down, y_up, color='yellow', alpha='0.5')
# plot the boundaries
x_min, x_max = -6, 6
y_min, y_max = -6, 6
step = 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
xxyy = np.c_[xx.ravel(), yy.ravel()]
Z = clf.predict(xxyy)
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.viridis, alpha = 0.5)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu, s=40)
plt.xlim(-6,6)
plt.ylim(-6,6)
plt.show()
In [15]:
interact(plot_margins, p=(2,40))
Out[15]:
In [ ]: