In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
iris = load_iris()
X = iris.data
y = iris.target
# dataset for decision function visualization
X_2d = X[:, :2]
X_2d = X_2d[y > 0]
y_2d = y[y > 0]
y_2d -= 1
X_train, X_test, y_train, y_test = train_test_split(X_2d, y_2d)
In [2]:
%matplotlib inline
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=100)
Out[2]:
In [3]:
def show_decision_function(clf, ax):
xx, yy = np.meshgrid(np.linspace(4.5, 8, 200), np.linspace(1.5, 4.0, 200))
try:
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
except AttributeError:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 0]
Z = Z.reshape(xx.shape)
ax.pcolormesh(xx, yy, Z, cmap=plt.cm.jet)
ax.set_xlim(4.5, 8)
ax.set_ylim(1.5, 4.0)
ax.set_xticks(())
ax.set_yticks(())
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=100)
In [4]:
from sklearn.svm import SVC
training_scores = []
test_scores = []
fig, axes = plt.subplots(2, 3, figsize=(20, 10))
Cs = [0.01, 0.1, 1, 10, 100, 1000]
for C, ax in zip(Cs, axes.ravel()):
clf = SVC(gamma=10, C=C)
clf.fit(X_train, y_train)
training_scores.append(clf.score(X_train, y_train))
test_scores.append(clf.score(X_test, y_test))
show_decision_function(clf, ax)
plt.savefig("iris_overfitting_decision.png", bbox_inches="tight")
In [5]:
plt.figure(figsize=(20, 10))
plt.plot(training_scores, label="training scores")
plt.plot(test_scores, label="test scores")
plt.legend(loc="best")
plt.xticks(range(6), Cs)
plt.savefig("iris_overfitting_curve.png", bbox_inches="tight")