During Training:
In [1]:
import sklearn
import mglearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
from sklearn.svm import SVC
In [3]:
X, y = mglearn.tools.make_handcrafted_dataset()
svm = SVC(kernel='rbf', C=10, gamma=0.1).fit(X, y)
mglearn.plots.plot_2d_separator(svm, X, eps=.5)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
# plot support vectors
sv = svm.support_vectors_
# class labels of support vectors are given by sign of dual coefficients
sv_labels = svm.dual_coef_.ravel() > 0
mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
Out[3]:
dual_coef
)
In [5]:
fig, axes = plt.subplots(3, 3, figsize=(15,10))
for ax, C in zip(axes, [-1, 0, 3]):
for a, gamma in zip(ax, range(-1,2)):
mglearn.plots.plot_svm(log_C=C, log_gamma=gamma, ax=a)
axes[0,0].legend(["Class 0", "Class 1", "Class 2",
"SV Class 0", "SV Class 1"],
ncol=4, loc=(0.9, 1.2))
Out[5]:
In [6]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
print(cancer.keys())
In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, stratify=cancer.target, random_state=0)
In [11]:
svc = SVC()
svc.fit(X_train, y_train)
Out[11]:
In [12]:
print("Accurary on Training set: {:.3f}".format(svc.score(X_train, y_train)))
print("Accuracy Test set: {:.3f}".format(svc.score(X_test, y_test)))
In [14]:
plt.plot(X_train.min(axis=0), 'o', label="min")
plt.plot(X_train.max(axis=0), '^', label="max")
plt.legend(loc=4)
plt.xlabel("Feature Index")
plt.ylabel("Feature Magnitude (log)")
plt.yscale("log")
In [17]:
# Compute minimum value per feature on Training set
min_on_training = X_train.min(axis=0)
# Compute range of each feature (max - min) on Training set
range_on_training = (X_train - min_on_training).max(axis=0)
# subtract min, divide by range; then min=0 and max=1 for each feature
X_train_scaled = (X_train - min_on_training) / range_on_training
print("Minimum for each feature\n{}".format(X_train_scaled.min(axis=0)))
print("Maximum for each feature\n {}".format(X_train_scaled.max(axis=0)))
In [16]:
# use SAME transformation on Test set, using min and range of training set
X_test_scaled = (X_test - min_on_training) / range_on_training
In [23]:
svc = SVC()
svc.fit(X_train_scaled, y_train)
print("Accurary on Training set: {:.3f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy Test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))
In [25]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)
print("Accurary on Training set: {:.3f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy Test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))