In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
iris = load_iris()
#from sklearn.datasets import make_moons
from sklearn.cross_validation import train_test_split
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from scipy import ndimage
from matplotlib.colors import ListedColormap, colorConverter
mglearn: see https://github.com/amueller/introduction_to_ml_with_python/blob/master/mglearn/
In [2]:
X = iris.data[:,2:] # the attributes
y = iris.target # the target variable
# random_state : int or RandomState
# Pseudo-random number generator state used for random sampling.
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
random_state=42, train_size=.25, test_size=.75)
forest = RandomForestClassifier(n_estimators=5, random_state=2)
forest.fit(X_train, y_train)
Out[2]:
In [3]:
# warning - copy-paste from https://github.com/amueller/introduction_to_ml_with_python/
# (no license found)
cm2 = ListedColormap(['#0000aa', '#ff2020'])
def plot_tree_partition(X, y, tree, ax=None):
if ax is None:
ax = plt.gca()
eps = X.std() / 2.
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx = np.linspace(x_min, x_max, 1000)
yy = np.linspace(y_min, y_max, 1000)
X1, X2 = np.meshgrid(xx, yy)
X_grid = np.c_[X1.ravel(), X2.ravel()]
Z = tree.predict(X_grid)
Z = Z.reshape(X1.shape)
faces = tree.apply(X_grid)
faces = faces.reshape(X1.shape)
border = ndimage.laplace(faces) != 0
ax.contourf(X1, X2, Z, alpha=.4, cmap=cm2, levels=[0, .5, 1])
ax.scatter(X1[border], X2[border], marker='.', s=.2)
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xticks(())
ax.set_yticks(())
return ax
def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
cm=cm2, linewidth=None, threshold=None, linestyle="solid"):
# binary?
if eps is None:
eps = X.std() / 2.
if ax is None:
ax = plt.gca()
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx = np.linspace(x_min, x_max, 100)
yy = np.linspace(y_min, y_max, 100)
X1, X2 = np.meshgrid(xx, yy)
X_grid = np.c_[X1.ravel(), X2.ravel()]
try:
decision_values = classifier.decision_function(X_grid)
levels = [0] if threshold is None else [threshold]
fill_levels = [decision_values.min()] + levels + [decision_values.max()]
except AttributeError:
# no decision_function
decision_values = classifier.predict_proba(X_grid)[:, 1]
levels = [.5] if threshold is None else [threshold]
fill_levels = [0] + levels + [1]
if fill:
ax.contourf(X1, X2, decision_values.reshape(X1.shape),
levels=fill_levels, alpha=alpha, cmap=cm)
else:
ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
colors="black", alpha=alpha, linewidths=linewidth,
linestyles=linestyle, zorder=5)
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xticks(())
ax.set_yticks(())
In [7]:
fig, axes = plt.subplots(2, 3, figsize=(20, 10))
for i, (ax, tree) in enumerate(zip(axes.ravel(), forest.estimators_)):
this_tree_accuracy = tree.score(X_test, y_test)
ax.set_title("Tree {}. Accuracy: {:.3f}".format(i+1, this_tree_accuracy))
plot_tree_partition(X_train, y_train, tree, ax=ax)
ax.scatter(X_train[:, 0], X_train[:, 1], c=np.array(['r', 'g', 'b'])[y_train], s=60)
plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4)
forest_accuracy = forest.score(X_test, y_test)
axes[-1, -1].set_title("Random forest. Accuracy: {:.3f}".format(forest_accuracy))
plt.scatter(X_train[:, 0], X_train[:, 1], c=np.array(['r', 'g', 'b'])[y_train], s=60)
print('Adaptation of the code used in “Introduction to Machine Learning with Python_RandomForest” >>>')
In [ ]:
In [ ]: