In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer
import numpy as np
from functools import reduce
# Import our custom utilities
from imp import reload
from utils import irf_jupyter_utils
from utils import irf_utils
reload(irf_jupyter_utils)
reload(irf_utils)
Out[9]:
In [10]:
np.random.seed(12)
tree = irf_utils.build_tree(feature_paths=irf_utils.select_random_path(),
max_depth=3,
noisy_split=False,
num_splits=5)
In [11]:
print("Root:\n", tree._val)
#print("Some child:\n", tree.children[0].children[1]._val)
In [12]:
# If noisy split is False, this should pass
assert(len(tree) == 1 + 5 + 5**2)
#assert(len(tree) == 6)
In [13]:
list(tree.traverse_depth_first())
Out[13]:
In [49]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
raw_data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
raw_data.data, raw_data.target, train_size=0.9,
random_state=2017)
rf = RandomForestClassifier(
n_estimators=10, random_state=2018)
rf.fit(X=X_train, y=y_train)
estimator0 = rf.estimators_[0]
estimator0_out = irf_utils.get_tree_data(X_train=X_train,
dtree=estimator0,
root_node_id=0)
print(estimator0_out['all_leaf_nodes'])
In [50]:
# Import our custom utilities
from imp import reload
from utils import irf_jupyter_utils
from utils import irf_utils
reload(irf_jupyter_utils)
reload(irf_utils)
Out[50]:
In [51]:
estimator0_out_fltr = irf_utils.filter_leaves_classifier(dtree_data=estimator0_out,bin_class_type=1)
In [52]:
estimator0_out_fltr
Out[52]:
In [53]:
print("Total Number of classes", len(estimator0_out['all_leaf_node_classes']), sep=":\n")
print("Total Number of 1-value classes", sum(estimator0_out['all_leaf_node_classes']), sep=":\n")
In [55]:
print("Total Number of 1-value classes", len(estimator0_out_fltr['f_leaf_nodes_depths']), sep=":\n")
In [ ]: