In [ ]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [ ]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
iris = load_iris()
X, y = iris.data, iris.target
n_samples = X.shape[0]
print(X.shape)
print(y.shape)
print(y)
In [ ]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
print(X_train.shape)
print(X_test.shape)
In [ ]:
classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train)
classifier.score(X_test, y_test)
In [ ]:
from sklearn.cross_validation import cross_val_score
scores = cross_val_score(classifier, X, y)
print(scores)
print(np.mean(scores))
In [ ]:
cross_val_score(classifier, X, y, cv=5)
In [ ]:
from sklearn.cross_validation import KFold, StratifiedKFold, ShuffleSplit, LeavePLabelOut
In [ ]:
cv = StratifiedKFold(iris.target, n_folds=5)
for train, test in cv:
print(test)
In [ ]:
def plot_cv(cv, n_samples):
masks = []
for train, test in cv:
mask = np.zeros(n_samples, dtype=bool)
mask[test] = 1
masks.append(mask)
plt.matshow(masks)
In [ ]:
cv = StratifiedKFold(y, n_folds=5)
plot_cv(cv, n_samples)
In [ ]:
cv = KFold(n_samples, n_folds=5)
plot_cv(cv, n_samples)
In [ ]:
cv = KFold(n_samples, n_folds=5, shuffle=True)
plot_cv(cv, n_samples)
In [ ]:
cv = KFold(n_samples, n_folds=10)
plot_cv(cv, n_samples)
In [ ]:
cv = ShuffleSplit(n_samples, n_iter=5, test_size=.2)
plot_cv(cv, n_samples)
In [ ]:
cv = ShuffleSplit(n_samples, n_iter=20, test_size=.2)
plot_cv(cv, n_samples)
In [ ]:
cv = ShuffleSplit(n_samples, n_iter=5, test_size=.2)
cross_val_score(classifier, X, y, cv=cv)
In [ ]: