NOTE: This module code was partly taken from Andreas Muellers Adavanced scikit-learn O'Reilly Course
It is just used to explore the scikit-learn random forest object in a systematic manner
I've added more code to it to understand how to generate tree plots for random forests
In [21]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [8]:
%%bash
pwd
ls
In [12]:
from figures import plot_interactive_tree
plot_interactive_tree.plot_tree_interactive()
In [15]:
from figures import plot_interactive_forest
plot_interactive_forest.plot_forest_interactive()
In [36]:
from sklearn import grid_search
from sklearn import tree
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
rf = RandomForestClassifier(n_estimators=200, n_jobs=-1)
parameters = {'max_features':['sqrt', 'log2'],
'max_depth':[5, 7, 9]}
clf_grid = grid_search.GridSearchCV(rf, parameters)
clf_grid.fit(X_train, y_train)
Out[36]:
In [17]:
clf_grid.score(X_train, y_train)
Out[17]:
In [18]:
clf_grid.score(X_test, y_test)
Out[18]:
In [19]:
clf_grid.best_params_
Out[19]:
In [20]:
clf_grid.best_estimator_
Out[20]:
In [80]:
rf = RandomForestClassifier(n_estimators=5, n_jobs=-1)
rf.fit(X_train, y_train)
Out[80]:
In [81]:
rf.score(X_test, y_test)
Out[81]:
In [82]:
print([estimator.tree_.max_depth for estimator in rf.estimators_])
In [86]:
for idx, dec_tree in enumerate(rf.estimators_):
if idx == 0:
print(dec_tree.tree_.max_depth)
else:
pass
In [87]:
for idx, dec_tree in enumerate(rf.estimators_):
if idx == 0:
tree.export_graphviz(dec_tree)
In [88]:
from sklearn import tree
i_tree = 0
for tree_in_forest in rf.estimators_:
if i_tree ==0:
with open('tree_' + str(i_tree) + '.png', 'w') as my_file:
my_file = tree.export_graphviz(tree_in_forest, out_file = my_file)
i_tree = i_tree + 1
else:
pass
In [ ]:
import io
from scipy import misc
from sklearn import tree
import pydot
def show_tree(decisionTree, file_path):
dotfile = io.StringIO()
tree.export_graphviz(decisionTree, out_file=dotfile)
(graph,)=pydot.graph_from_dot_data(dotfile.getvalue())
#pydot.graph_from_dot_data(dotfile.getvalue()).write_png(file_path)
graph.write_png(file_path)
i = misc.imread(file_path)
plt.imshow(i)
In [ ]:
from sklearn import tree
i_tree = 0
for tree_in_forest in rf.estimators_:
if i_tree ==0:
show_tree(tree_in_forest, 'test.png')