Trees and Forests


In [ ]:
%matplotlib nbagg
import numpy as np
import matplotlib.pyplot as plt

Decision Tree Classification


In [ ]:
from plots import plot_tree_interactive
plot_tree_interactive()

Random Forests


In [ ]:
from plots import plot_forest_interactive
plot_forest_interactive()

Selecting the Optimal Estimator via Cross-Validation


In [ ]:
from sklearn import grid_search
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier

digits = load_digits()
X, y = digits.data, digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

rf = RandomForestClassifier(n_estimators=200, n_jobs=-1)
parameters = {'max_features':['sqrt', 'log2'],
              'max_depth':[5, 7, 9]}

clf_grid = grid_search.GridSearchCV(rf, parameters)
clf_grid.fit(X_train, y_train)

In [ ]:
clf_grid.score(X_train, y_train)

In [ ]:
clf_grid.score(X_test, y_test)

Exercises

  • Plot the validation curve for the maximum depth of a decision tree on the digits dataset.
  • Plot the validation curve for max_features of a random forest on the digits dataset.

In [ ]:
# %load solutions/forests.py