In [ ]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
In [ ]:
from sklearn.datasets import load_iris
In [ ]:
iris = load_iris()
X = iris.data
y = iris.target
In [ ]:
from sklearn.cross_validation import cross_val_score
from sklearn.svm import LinearSVC
In [ ]:
cross_val_score(LinearSVC(), X, y, cv=5)
In [ ]:
cross_val_score(LinearSVC(), X, y, cv=5, scoring="f1_macro")
Let's go to a binary task for a moment
In [ ]:
y % 2
In [ ]:
cross_val_score(LinearSVC(), X, y % 2)
In [ ]:
cross_val_score(LinearSVC(), X, y % 2, scoring="average_precision")
In [ ]:
cross_val_score(LinearSVC(), X, y % 2, scoring="roc_auc")
In [ ]:
from sklearn.metrics.scorer import SCORERS
print(SCORERS.keys())
There are other ways to do cross-valiation
In [ ]:
from sklearn.cross_validation import ShuffleSplit
shuffle_split = ShuffleSplit(len(X), 10, test_size=.4)
cross_val_score(LinearSVC(), X, y, cv=shuffle_split)
In [ ]:
from sklearn.cross_validation import StratifiedKFold, KFold, ShuffleSplit
def plot_cv(cv, n_samples):
masks = []
for train, test in cv:
mask = np.zeros(n_samples, dtype=bool)
mask[test] = 1
masks.append(mask)
plt.figure(figsize=(10, 4))
plt.subplots_adjust(left=0, bottom=0, right=1, top=1)
plt.imshow(masks, interpolation='none')
In [ ]:
plot_cv(StratifiedKFold(y, n_folds=5), len(y))
In [ ]:
plot_cv(KFold(len(iris.target), n_folds=5), len(iris.target))
In [ ]:
plot_cv(ShuffleSplit(len(iris.target), n_iter=20, test_size=.2),
len(iris.target))
In [ ]:
# %load solutions/cross_validation_iris.py