In [ ]:
%matplotlib nbagg
import matplotlib.pyplot as plt
import numpy as np

Grid Searches

Grid-Search with build-in cross validation


In [ ]:
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC

In [ ]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data,
                                                    digits.target, random_state=0)

Define parameter grid:


In [ ]:
import numpy as np

param_grid = {'C': 10. ** np.arange(-3, 3),
              'gamma' : 10. ** np.arange(-5, 0)}

np.set_printoptions(suppress=True)
print(param_grid)

In [ ]:
grid_search = GridSearchCV(SVC(), param_grid, verbose=3)

A GridSearchCV object behaves just like a normal classifier.


In [ ]:
grid_search.fit(X_train, y_train)

In [ ]:
grid_search.predict(X_test)

In [ ]:
grid_search.score(X_test, y_test)

In [ ]:
grid_search.best_params_

In [ ]:
# We extract just the scores

scores = [x.mean_validation_score for x in grid_search.grid_scores_]
scores = np.array(scores).reshape(6, 5)

plt.matshow(scores)
plt.xlabel('gamma')
plt.ylabel('C')
plt.colorbar()
plt.xticks(np.arange(5), param_grid['gamma'])
plt.yticks(np.arange(6), param_grid['C']);

Nested Cross-validation in scikit-learn:


In [ ]:

Exercises

Use GridSearchCV to adjust n_neighbors of KNeighborsClassifier. Visualize grid_search.grid_scores_.


In [ ]:
from sklearn.neighbors import KNeighborsClassifier

In [ ]:
# %load solutions/grid_search_k_neighbors.py