Key Requirements for the iRF scikit-learn implementation

  • The following is a documentation of the main requirements for the iRF implementation

Typical Setup

Import the required dependencies

  • In particular irf_utils and irf_jupyter_utils

In [86]:
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
import numpy as np
from functools import reduce

# Import our custom utilities
from imp import reload
from utils import irf_jupyter_utils
from utils import irf_utils
reload(irf_jupyter_utils)
reload(irf_utils)


Out[86]:
<module 'utils.irf_utils' from '/Users/shamindras/PERSONAL/LEARNING/REPOS/scikit-learn-sandbox/jupyter/utils/irf_utils.py'>

Step 1: Fit the Initial Random Forest

  • Just fit every feature with equal weights per the usual random forest code e.g. DecisionForestClassifier in scikit-learn

In [87]:
load_breast_cancer = load_breast_cancer()

In [88]:
X_train, X_test, y_train, y_test, rf = irf_jupyter_utils.generate_rf_example(n_estimators=10)

Check out the data


In [89]:
print("Training feature dimensions", X_train.shape, sep = ":\n")
print("\n")
print("Training outcome dimensions", y_train.shape, sep = ":\n")
print("\n")
print("Test feature dimensions", X_test.shape, sep = ":\n")
print("\n")
print("Test outcome dimensions", y_test.shape, sep = ":\n")
print("\n")
print("first 5 rows of the training set features", X_train[:2], sep = ":\n")
print("\n")
print("first 5 rows of the training set outcomes", y_train[:2], sep = ":\n")


Training feature dimensions:
(512, 30)


Training outcome dimensions:
(512,)


Test feature dimensions:
(57, 30)


Test outcome dimensions:
(57,)


first 5 rows of the training set features:
[[  1.98900000e+01   2.02600000e+01   1.30500000e+02   1.21400000e+03
    1.03700000e-01   1.31000000e-01   1.41100000e-01   9.43100000e-02
    1.80200000e-01   6.18800000e-02   5.07900000e-01   8.73700000e-01
    3.65400000e+00   5.97000000e+01   5.08900000e-03   2.30300000e-02
    3.05200000e-02   1.17800000e-02   1.05700000e-02   3.39100000e-03
    2.37300000e+01   2.52300000e+01   1.60500000e+02   1.64600000e+03
    1.41700000e-01   3.30900000e-01   4.18500000e-01   1.61300000e-01
    2.54900000e-01   9.13600000e-02]
 [  2.01800000e+01   1.95400000e+01   1.33800000e+02   1.25000000e+03
    1.13300000e-01   1.48900000e-01   2.13300000e-01   1.25900000e-01
    1.72400000e-01   6.05300000e-02   4.33100000e-01   1.00100000e+00
    3.00800000e+00   5.24900000e+01   9.08700000e-03   2.71500000e-02
    5.54600000e-02   1.91000000e-02   2.45100000e-02   4.00500000e-03
    2.20300000e+01   2.50700000e+01   1.46000000e+02   1.47900000e+03
    1.66500000e-01   2.94200000e-01   5.30800000e-01   2.17300000e-01
    3.03200000e-01   8.07500000e-02]]


first 5 rows of the training set outcomes:
[0 0]

Step 2: Get all Random Forest and Decision Tree Data

  • Extract in a single dictionary the random forest data and for all of it's decision trees
  • This is as required for RIT purposes

In [90]:
all_rf_tree_data = irf_utils.get_rf_tree_data(rf=rf,
                                              X_train=X_train, y_train=y_train, 
                                              X_test=X_test, y_test=y_test)
#all_rf_tree_data

STEP 3: Get the RIT data and produce RITs


In [91]:
np.random.seed(12)
gen_random_leaf_paths = irf_utils.generate_rit_samples(all_rf_tree_data=all_rf_tree_data, 
                                                       bin_class_type=1)

In [92]:
#for i in range(7):
#    print(next(gen_random_leaf_paths))

In [93]:
rit0 = irf_utils.build_tree(feature_paths=gen_random_leaf_paths, 
                            max_depth=3, 
                            noisy_split=False, 
                            num_splits=2)

In [94]:
for node in rit0.traverse_depth_first():
    print(node[0], node[1]._val)


0 [ 0 10 20 24 27]
1 [24]
2 []
3 []
4 []

In [95]:
for idx, node in enumerate(rit0.leaf_nodes()):
    print(idx, node[1]._val)


0 []
1 []
2 []

In [96]:
print("Root:\n", rit0._val)
#print("Some child:\n", tree.children[0].children[1]._val)


Root:
 [ 0 10 20 24 27]

In [97]:
len(rit0)


Out[97]:
5

In [98]:
# If noisy split is False, this should pass
#assert(len(rit0) == 1 + 5 + 5**2)
assert(len(rit0) <= 1 + 5 + 5**2)

In [99]:
M = 10
np.random.seed(12)
# Create the weighted randomly sampled paths as a generator
gen_random_leaf_paths = irf_utils.generate_rit_samples(all_rf_tree_data=all_rf_tree_data, 
                                                       bin_class_type=1)
# Create the RIT object
rit = irf_utils.build_tree(feature_paths=gen_random_leaf_paths, 
                            max_depth=3, 
                            noisy_split=False, 
                            num_splits=2)
# Get the intersected node values
rit_intersected_values = [node[1]._val for node in rit.traverse_depth_first()]
# Leaf node values i.e. final intersected features
rit_leaf_node_values = [node[1]._val for node in rit.leaf_nodes()]
rit_leaf_node_union_value = reduce(np.union1d, rit_leaf_node_values)
rit_outputs = {"rit": rit,
                   "rit_intersected_values": rit_intersected_values,
                   "rit_leaf_node_values": rit_leaf_node_values,                   
                   "rit_leaf_node_union_value": rit_leaf_node_union_value}
rit_outputs['rit_intersected_values']


Out[99]:
[array([ 0, 10, 20, 24, 27]),
 array([24]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64)]

In [100]:
def get_rit_tree_data(all_rf_tree_data,
                      bin_class_type=1,
                      random_state=12, 
                      #M=10, # number of trees (RIT) to build 
                      feature_paths=gen_random_leaf_paths, 
                      max_depth=3, 
                      noisy_split=False, 
                      num_splits=2):
    """
    A wrapper for the Random Intersection Trees (RIT) algorithm
    """
    # Set the random seed for reproducibility
    np.random.seed(12)    
    
    # Create the weighted randomly sampled paths as a generator
    gen_random_leaf_paths = irf_utils.generate_rit_samples(
        all_rf_tree_data=all_rf_tree_data, 
        bin_class_type=bin_class_type)
    
    # Create the RIT object
    rit = irf_utils.build_tree(feature_paths=gen_random_leaf_paths, 
                                max_depth=max_depth, 
                                noisy_split=noisy_split, 
                                num_splits=num_splits)
    
    # Get the intersected node values
    # CHECK remove this for the final value
    rit_intersected_values = [node[1]._val for node in rit.traverse_depth_first()]
    # Leaf node values i.e. final intersected features
    rit_leaf_node_values = [node[1]._val for node in rit.leaf_nodes()]
    rit_leaf_node_union_value = reduce(np.union1d, rit_leaf_node_values)
    rit_outputs = {"rit": rit,
                   "rit_intersected_values": rit_intersected_values,
                   "rit_leaf_node_values": rit_leaf_node_values,                   
                   "rit_leaf_node_union_value": rit_leaf_node_union_value}
    return rit_outputs

In [101]:
a = get_rit_tree_data(all_rf_tree_data=all_rf_tree_data,
                      bin_class_type=1,
                      random_state=12, 
                      #M=10, # number of trees (RIT) to build
                      max_depth=3, 
                      noisy_split=False, 
                      num_splits=2)

In [102]:
a['rit_intersected_values']


Out[102]:
[array([ 0, 10, 20, 24, 27]),
 array([24]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64)]

In [103]:
b = irf_utils.get_rit_tree_data(all_rf_tree_data=all_rf_tree_data,
                      bin_class_type=1,
                      random_state=12, 
                      M=1,
                      max_depth=3, 
                      noisy_split=False, 
                      num_splits=2)

In [106]:
b['rit0']['rit_intersected_values']


Out[106]:
[array([ 0, 10, 20, 24, 27]),
 array([24]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([], dtype=int64)]

In [107]:
c = irf_utils.get_rit_tree_data(all_rf_tree_data=all_rf_tree_data,
                      bin_class_type=1,
                      random_state=12, 
                      M=10,
                      max_depth=3, 
                      noisy_split=False, 
                      num_splits=2)

In [108]:
c['rit1']


Out[108]:
{'rit': <utils.irf_utils.RITTree at 0x117377630>,
 'rit_intersected_values': [array([ 6,  9, 10, 20, 22, 27, 29]),
  array([ 6,  9, 10, 20, 27]),
  array([6]),
  array([ 6,  9, 10, 20, 27]),
  array([10, 20, 27]),
  array([], dtype=int64),
  array([20])],
 'rit_leaf_node_union_value': array([ 6,  9, 10, 20, 27]),
 'rit_leaf_node_values': [array([6]),
  array([ 6,  9, 10, 20, 27]),
  array([], dtype=int64),
  array([20])]}

Perform Manual CHECKS on the irf_utils

  • These should be converted to unit tests and checked with nosetests -v test_irf_utils.py

Step 4: Plot some Data

List Ranked Feature Importances


In [16]:
# Print the feature ranking
print("Feature ranking:")

feature_importances_rank_idx = all_rf_tree_data['feature_importances_rank_idx']
feature_importances = all_rf_tree_data['feature_importances']

for f in range(X_train.shape[1]):
    print("%d. feature %d (%f)" % (f + 1
                                   , feature_importances_rank_idx[f]
                                   , feature_importances[feature_importances_rank_idx[f]]))


Feature ranking:
1. feature 20 (0.294270)
2. feature 23 (0.246163)
3. feature 22 (0.095275)
4. feature 7 (0.088542)
5. feature 6 (0.068028)
6. feature 26 (0.034007)
7. feature 27 (0.024631)
8. feature 24 (0.020245)
9. feature 0 (0.015324)
10. feature 28 (0.015231)
11. feature 13 (0.013823)
12. feature 3 (0.009706)
13. feature 1 (0.008211)
14. feature 4 (0.007102)
15. feature 29 (0.006801)
16. feature 25 (0.006115)
17. feature 9 (0.005294)
18. feature 2 (0.005136)
19. feature 10 (0.004665)
20. feature 17 (0.004292)
21. feature 19 (0.003996)
22. feature 12 (0.003848)
23. feature 21 (0.003743)
24. feature 15 (0.003576)
25. feature 5 (0.002950)
26. feature 14 (0.002440)
27. feature 18 (0.001854)
28. feature 16 (0.001778)
29. feature 8 (0.001585)
30. feature 11 (0.001370)

Plot Ranked Feature Importances


In [17]:
# Plot the feature importances of the forest
feature_importances_std = all_rf_tree_data['feature_importances_std']

plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1])
        , feature_importances[feature_importances_rank_idx]
        , color="r"
        , yerr = feature_importances_std[feature_importances_rank_idx], align="center")
plt.xticks(range(X_train.shape[1]), feature_importances_rank_idx)
plt.xlim([-1, X_train.shape[1]])
plt.show()


Manually construct a binary RIT

Filter list unique feature paths where leaf node values == 1

  • Get the unique feature paths where the leaf node predicted class is just 1
  • We are just going to get it from the first decision tree for this test case

In [18]:
uniq_feature_paths = all_rf_tree_data['dtree0']['all_uniq_leaf_paths_features']
leaf_node_classes  = all_rf_tree_data['dtree0']['all_leaf_node_classes']
ones_only = [i for i, j in zip(uniq_feature_paths, leaf_node_classes) 
               if j == 1]
ones_only


Out[18]:
[array([ 1,  5,  6, 13, 23, 26]),
 array([ 1,  5,  6,  9, 13, 23, 26]),
 array([ 1,  6, 22, 23, 26]),
 array([ 1,  6, 13, 22, 23, 26]),
 array([ 1,  3, 23, 26, 27]),
 array([ 1,  3, 17, 19, 23, 26, 27]),
 array([ 1,  3, 19, 23, 26, 27]),
 array([ 3, 18, 23, 26]),
 array([ 3,  8, 23, 26]),
 array([ 3,  8, 22, 23, 26]),
 array([ 3,  4,  8, 22, 23, 26]),
 array([22, 23, 26])]

Manually extract the last seven values

  • Just pick the last seven cases, we are going to manually construct
  • we are going to build a binary RIT of depth 3 i.e. max 2**3 -1 = 7 intersecting nodes

In [19]:
ones_only_seven = ones_only[-7:]
ones_only_seven


Out[19]:
[array([ 1,  3, 17, 19, 23, 26, 27]),
 array([ 1,  3, 19, 23, 26, 27]),
 array([ 3, 18, 23, 26]),
 array([ 3,  8, 23, 26]),
 array([ 3,  8, 22, 23, 26]),
 array([ 3,  4,  8, 22, 23, 26]),
 array([22, 23, 26])]

Manually leaf node 1's paths as a generator

Manually create the binary RIT


In [20]:
# Construct a binary version of the RIT manually!
# This should come in useful for unit tests!
node0 = ones_only_seven[0]
node1 = np.intersect1d(node0, ones_only_seven[1])
node2 = np.intersect1d(node1, ones_only_seven[2])
node3 = np.intersect1d(node1, ones_only_seven[3])
node4 = np.intersect1d(node0, ones_only_seven[4])
node5 = np.intersect1d(node4, ones_only_seven[5])
node6 = np.intersect1d(node4, ones_only_seven[6])

intersected_nodes_seven = [node0, node1, node2, node3, node4, node5, node6]

for idx, node in enumerate(intersected_nodes_seven):
    print("node" + str(idx), node)


node0 [ 1  3 17 19 23 26 27]
node1 [ 1  3 19 23 26 27]
node2 [ 3 23 26]
node3 [ 3 23 26]
node4 [ 3 23 26]
node5 [ 3 23 26]
node6 [23 26]

In [21]:
rit_output = reduce(np.union1d, (node2, node3, node5, node6))
rit_output


Out[21]:
array([ 3, 23, 26])

Create a binary RIT without the randomized split option


In [22]:
ones_only_seven_gen = (n for n in ones_only_seven)
#for i in range(len(ones_only_seven)):
#    print(next(ones_only_seven_gen))

In [23]:
rit_man0 = irf_utils.build_tree(
    feature_paths=ones_only_seven_gen,
    max_depth=3,
    noisy_split=False,
    num_splits=2)

In [24]:
print("Root:\n", rit_man0._val)


Root:
 [ 1  3 17 19 23 26 27]

In [25]:
print("Root:\n", rit_man0._val)
print("node1:\n", rit_man0.children[0]._val)
print("node4:\n", rit_man0.children[1]._val)
print("node2:\n", rit_man0.children[0].children[0]._val)
print("node3:\n", rit_man0.children[0].children[1]._val)
print("node5:\n", rit_man0.children[1].children[0]._val)
print("node6:\n", rit_man0.children[1].children[1]._val)


Root:
 [ 1  3 17 19 23 26 27]
node1:
 [ 1  3 19 23 26 27]
node4:
 [ 3 23 26]
node2:
 [ 3 23 26]
node3:
 [ 3 23 26]
node5:
 [ 3 23 26]
node6:
 [23 26]

In [26]:
for node in rit_man0.traverse_depth_first():
    print(node[1]._val)


[ 1  3 17 19 23 26 27]
[ 1  3 19 23 26 27]
[ 3 23 26]
[ 3 23 26]
[ 3 23 26]
[ 3 23 26]
[23 26]

In [27]:
for node in rit_man0.leaf_nodes():
    print(node[1]._val)


[ 3 23 26]
[ 3 23 26]
[ 3 23 26]
[23 26]

Check Output of Decision Tree Extracted Data

Decision Tree 0 (First) - Get output

Check the output against the decision tree graph


In [28]:
# Now plot the trees individually
irf_jupyter_utils.draw_tree(decision_tree = all_rf_tree_data['rf_obj'].estimators_[0])


Compare to our dict of extracted data from the tree


In [29]:
irf_jupyter_utils.pretty_print_dict(inp_dict = all_rf_tree_data['dtree0'])


{   'all_leaf_node_classes': [   1,
                                 0,
                                 1,
                                 0,
                                 1,
                                 0,
                                 1,
                                 1,
                                 0,
                                 1,
                                 1,
                                 0,
                                 0,
                                 1,
                                 0,
                                 1,
                                 1,
                                 1,
                                 0,
                                 1,
                                 0,
                                 0],
    'all_leaf_node_paths': [   array([0, 1, 2, 3, 4, 5, 6]),
                               array([0, 1, 2, 3, 4, 5, 7, 8]),
                               array([0, 1, 2, 3, 4, 5, 7, 9]),
                               array([ 0,  1,  2,  3,  4, 10]),
                               array([ 0,  1,  2,  3, 11, 12]),
                               array([ 0,  1,  2,  3, 11, 13, 14]),
                               array([ 0,  1,  2,  3, 11, 13, 15]),
                               array([ 0,  1,  2, 16, 17, 18, 19]),
                               array([ 0,  1,  2, 16, 17, 18, 20, 21, 22]),
                               array([ 0,  1,  2, 16, 17, 18, 20, 21, 23]),
                               array([ 0,  1,  2, 16, 17, 18, 20, 24]),
                               array([ 0,  1,  2, 16, 17, 25]),
                               array([ 0,  1,  2, 16, 26]),
                               array([ 0,  1, 27, 28, 29]),
                               array([ 0,  1, 27, 28, 30]),
                               array([ 0,  1, 27, 31, 32]),
                               array([ 0,  1, 27, 31, 33, 34]),
                               array([ 0,  1, 27, 31, 33, 35, 36]),
                               array([ 0,  1, 27, 31, 33, 35, 37]),
                               array([ 0, 38, 39, 40]),
                               array([ 0, 38, 39, 41]),
                               array([ 0, 38, 42])],
    'all_leaf_node_samples': [   157,
                                 1,
                                 6,
                                 1,
                                 6,
                                 3,
                                 1,
                                 16,
                                 1,
                                 1,
                                 5,
                                 2,
                                 3,
                                 5,
                                 1,
                                 2,
                                 1,
                                 1,
                                 11,
                                 4,
                                 2,
                                 99],
    'all_leaf_node_samples_percent': [   47.72036474164134,
                                         0.303951367781155,
                                         1.8237082066869301,
                                         0.303951367781155,
                                         1.8237082066869301,
                                         0.91185410334346506,
                                         0.303951367781155,
                                         4.86322188449848,
                                         0.303951367781155,
                                         0.303951367781155,
                                         1.5197568389057752,
                                         0.60790273556231,
                                         0.91185410334346506,
                                         1.5197568389057752,
                                         0.303951367781155,
                                         0.60790273556231,
                                         0.303951367781155,
                                         0.303951367781155,
                                         3.3434650455927053,
                                         1.21580547112462,
                                         0.60790273556231,
                                         30.091185410334347],
    'all_leaf_node_values': [   array([[  0, 239]]),
                                array([[1, 0]]),
                                array([[0, 8]]),
                                array([[2, 0]]),
                                array([[0, 8]]),
                                array([[7, 0]]),
                                array([[0, 2]]),
                                array([[ 0, 27]]),
                                array([[3, 0]]),
                                array([[0, 1]]),
                                array([[ 0, 10]]),
                                array([[2, 0]]),
                                array([[7, 0]]),
                                array([[0, 7]]),
                                array([[1, 0]]),
                                array([[0, 2]]),
                                array([[0, 2]]),
                                array([[0, 1]]),
                                array([[19,  0]]),
                                array([[0, 6]]),
                                array([[2, 0]]),
                                array([[155,   0]])],
    'all_leaf_nodes': [   6,
                          8,
                          9,
                          10,
                          12,
                          14,
                          15,
                          19,
                          22,
                          23,
                          24,
                          25,
                          26,
                          29,
                          30,
                          32,
                          34,
                          36,
                          37,
                          40,
                          41,
                          42],
    'all_leaf_paths_features': [   array([23, 26,  1,  6, 13,  5]),
                                   array([23, 26,  1,  6, 13,  5,  9]),
                                   array([23, 26,  1,  6, 13,  5,  9]),
                                   array([23, 26,  1,  6, 13]),
                                   array([23, 26,  1,  6, 22]),
                                   array([23, 26,  1,  6, 22, 13]),
                                   array([23, 26,  1,  6, 22, 13]),
                                   array([23, 26,  1,  3, 27, 27]),
                                   array([23, 26,  1,  3, 27, 27, 19, 17]),
                                   array([23, 26,  1,  3, 27, 27, 19, 17]),
                                   array([23, 26,  1,  3, 27, 27, 19]),
                                   array([23, 26,  1,  3, 27]),
                                   array([23, 26,  1,  3]),
                                   array([23, 26,  3, 18]),
                                   array([23, 26,  3, 18]),
                                   array([23, 26,  3,  8]),
                                   array([23, 26,  3,  8, 22]),
                                   array([23, 26,  3,  8, 22,  4]),
                                   array([23, 26,  3,  8, 22,  4]),
                                   array([23, 26, 22]),
                                   array([23, 26, 22]),
                                   array([23, 26])],
    'all_scaled_leaf_node_values': [   array([[ 0.        ,  0.46679688]]),
                                       array([[ 0.00195312,  0.        ]]),
                                       array([[ 0.      ,  0.015625]]),
                                       array([[ 0.00390625,  0.        ]]),
                                       array([[ 0.      ,  0.015625]]),
                                       array([[ 0.01367188,  0.        ]]),
                                       array([[ 0.        ,  0.00390625]]),
                                       array([[ 0.        ,  0.05273438]]),
                                       array([[ 0.00585938,  0.        ]]),
                                       array([[ 0.        ,  0.00195312]]),
                                       array([[ 0.        ,  0.01953125]]),
                                       array([[ 0.00390625,  0.        ]]),
                                       array([[ 0.01367188,  0.        ]]),
                                       array([[ 0.        ,  0.01367188]]),
                                       array([[ 0.00195312,  0.        ]]),
                                       array([[ 0.        ,  0.00390625]]),
                                       array([[ 0.        ,  0.00390625]]),
                                       array([[ 0.        ,  0.00195312]]),
                                       array([[ 0.03710938,  0.        ]]),
                                       array([[ 0.        ,  0.01171875]]),
                                       array([[ 0.00390625,  0.        ]]),
                                       array([[ 0.30273438,  0.        ]])],
    'all_uniq_leaf_paths_features': [   array([ 1,  5,  6, 13, 23, 26]),
                                        array([ 1,  5,  6,  9, 13, 23, 26]),
                                        array([ 1,  5,  6,  9, 13, 23, 26]),
                                        array([ 1,  6, 13, 23, 26]),
                                        array([ 1,  6, 22, 23, 26]),
                                        array([ 1,  6, 13, 22, 23, 26]),
                                        array([ 1,  6, 13, 22, 23, 26]),
                                        array([ 1,  3, 23, 26, 27]),
                                        array([ 1,  3, 17, 19, 23, 26, 27]),
                                        array([ 1,  3, 17, 19, 23, 26, 27]),
                                        array([ 1,  3, 19, 23, 26, 27]),
                                        array([ 1,  3, 23, 26, 27]),
                                        array([ 1,  3, 23, 26]),
                                        array([ 3, 18, 23, 26]),
                                        array([ 3, 18, 23, 26]),
                                        array([ 3,  8, 23, 26]),
                                        array([ 3,  8, 22, 23, 26]),
                                        array([ 3,  4,  8, 22, 23, 26]),
                                        array([ 3,  4,  8, 22, 23, 26]),
                                        array([22, 23, 26]),
                                        array([22, 23, 26]),
                                        array([23, 26])],
    'leaf_nodes_depths': [   6,
                             7,
                             7,
                             5,
                             5,
                             6,
                             6,
                             6,
                             8,
                             8,
                             7,
                             5,
                             4,
                             4,
                             4,
                             4,
                             5,
                             6,
                             6,
                             3,
                             3,
                             2],
    'max_node_depth': 8,
    'n_nodes': 43,
    'node_features_idx': array([23, 26,  1,  6, 13,  5, 28,  9, 28, 28, 28, 22, 28, 13, 28, 28,  3,
       27, 27, 28, 19, 17, 28, 28, 28, 28, 28,  3, 18, 28, 28,  8, 28, 22,
       28,  4, 28, 28, 26, 22, 28, 28, 28]),
    'num_features_used': 16,
    'tot_leaf_node_values': [   239,
                                1,
                                8,
                                2,
                                8,
                                7,
                                2,
                                27,
                                3,
                                1,
                                10,
                                2,
                                7,
                                7,
                                1,
                                2,
                                2,
                                1,
                                19,
                                6,
                                2,
                                155],
    'validation_metrics': {   'accuracy_score': 0.91228070175438591,
                              'confusion_matrix': array([[10,  4],
       [ 1, 42]]),
                              'f1_score': 0.9438202247191011,
                              'hamming_loss': 0.08771929824561403,
                              'log_loss': 3.0297733397241098,
                              'precision_score': 0.91304347826086951,
                              'recall_score': 0.97674418604651159,
                              'zero_one_loss': 0.087719298245614086}}

In [30]:
# Count the number of samples passing through the leaf nodes
sum(all_rf_tree_data['dtree0']['tot_leaf_node_values'])


Out[30]:
512

Check output against the diagram


In [31]:
irf_jupyter_utils.pretty_print_dict(inp_dict = all_rf_tree_data['dtree0']['all_leaf_paths_features'])


[   array([23, 26,  1,  6, 13,  5]),
    array([23, 26,  1,  6, 13,  5,  9]),
    array([23, 26,  1,  6, 13,  5,  9]),
    array([23, 26,  1,  6, 13]),
    array([23, 26,  1,  6, 22]),
    array([23, 26,  1,  6, 22, 13]),
    array([23, 26,  1,  6, 22, 13]),
    array([23, 26,  1,  3, 27, 27]),
    array([23, 26,  1,  3, 27, 27, 19, 17]),
    array([23, 26,  1,  3, 27, 27, 19, 17]),
    array([23, 26,  1,  3, 27, 27, 19]),
    array([23, 26,  1,  3, 27]),
    array([23, 26,  1,  3]),
    array([23, 26,  3, 18]),
    array([23, 26,  3, 18]),
    array([23, 26,  3,  8]),
    array([23, 26,  3,  8, 22]),
    array([23, 26,  3,  8, 22,  4]),
    array([23, 26,  3,  8, 22,  4]),
    array([23, 26, 22]),
    array([23, 26, 22]),
    array([23, 26])]