In [1]:
from sklearn import datasets
import pandas as pd

%matplotlib inline

Build a model


In [2]:
ds = datasets.load_breast_cancer();
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(min_samples_leaf=30, random_state = 1960)
NC = 12

X = ds.data[:,0:NC]
y = ds.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1960)

clf.fit(X_train , y_train)


Out[2]:
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=30, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1960,
            splitter='best')

Decision Tree Plot


In [3]:
import graphviz 
from sklearn import tree

dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=ds.feature_names[0:NC],  
                         class_names=ds.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)  
graph = graphviz.Source(dot_data)  
graph


Out[3]:
Tree 0 mean concave points ≤ 0.051 gini = 0.462 samples = 284 value = [103, 181] class = benign 1 mean area ≤ 571.85 gini = 0.076 samples = 177 value = [7, 170] class = benign 0->1 True 6 mean perimeter ≤ 98.32 gini = 0.184 samples = 107 value = [96, 11] class = malignant 0->6 False 2 radius error ≤ 0.354 gini = 0.014 samples = 141 value = [1, 140] class = benign 1->2 5 gini = 0.278 samples = 36 value = [6, 30] class = benign 1->5 3 gini = 0.0 samples = 111 value = [0, 111] class = benign 2->3 4 gini = 0.064 samples = 30 value = [1, 29] class = benign 2->4 7 gini = 0.464 samples = 30 value = [19, 11] class = malignant 6->7 8 gini = 0.0 samples = 77 value = [77, 0] class = malignant 6->8

sklearn_explain approach


In [4]:
# Explain the score = ln(p(1) / (1 - p(1)))


import sklearn_explain.explainer as expl
lExplainer = expl.cModelScoreExplainer(clf)
lExplainer.mSettings.mFeatureNames = ds.feature_names[0:NC]
lExplainer.mSettings.mExplanationOrder = 1
    
lExplainer.fit(X_train)
df_rc = lExplainer.explain(X_test)

print(df_rc.columns)


USING_LOG_ODDS_AS_SCORE
SCORE_QUANTILES {0: -inf, 1: -11.512915464920228, 2: 1.6094379124341005, 3: 3.367295829986474, 4: 11.51291546492478}
CONST_SCORE_DETECTIOM -11.512915464920228 11.51291546492478 False
NON_USED_FEATURES ['mean radius', 'mean texture', 'mean smoothness', 'mean compactness', 'mean concavity', 'mean symmetry', 'mean fractal dimension', 'texture error']
USED_FEATURES ['mean perimeter', 'mean area', 'mean concave points', 'radius error']
FEATURE_QUANTILES mean perimeter {0: -inf, 1: 71.884, 2: 80.624, 3: 90.566, 4: 111.44}
FEATURE_QUANTILES mean area {0: -inf, 1: 387.1, 2: 477.3, 3: 602.0, 4: 922.44}
FEATURE_QUANTILES mean concave points {0: -inf, 1: 0.017368, 2: 0.027374, 3: 0.047373999999999965, 4: 0.08419800000000001}
FEATURE_QUANTILES radius error {0: -inf, 1: 0.21352, 2: 0.26870000000000005, 3: 0.36444, 4: 0.55352}
Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'Score', 'BinnedScore',
       'mean perimeter_bin', 'mean area_bin', 'mean concave points_bin',
       'radius error_bin', 'mean perimeter_encoded', 'mean area_encoded',
       'mean concave points_encoded', 'radius error_encoded',
       'mean perimeter_Effect', 'mean area_Effect',
       'mean concave points_Effect', 'radius error_Effect', 'reason_1',
       'reason_2', 'reason_3', 'reason_4', 'reason_1_idx', 'detailed_reason_1',
       'reason_2_idx', 'detailed_reason_2', 'reason_3_idx',
       'detailed_reason_3', 'reason_4_idx', 'detailed_reason_4'],
      dtype='object')

In [5]:
df_rc_2 = lExplainer.explain(X_test[0].reshape(1, -1))

In [6]:
X_test[0].reshape(1, -1)


Out[6]:
array([[1.176e+01, 2.160e+01, 7.472e+01, 4.279e+02, 8.637e-02, 4.966e-02,
        1.657e-02, 1.115e-02, 1.495e-01, 5.888e-02, 4.062e-01, 1.210e+00]])

In [7]:
df_rc_2


Out[7]:
mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... reason_3 reason_4 reason_1_idx detailed_reason_1 reason_2_idx detailed_reason_2 reason_3_idx detailed_reason_3 reason_4_idx detailed_reason_4
0 11.76 21.6 1 1 0.08637 0.04966 0.01657 0 0.1495 0.05888 ... mean area mean perimeter 2 [(-inf < 'mean concave points' <= 0.017368)] 3 [(0.36444 < 'radius error' <= 0.55352)] 1 [(387.1 < 'mean area' <= 477.3)] 0 [(71.884 < 'mean perimeter' <= 80.624)]

1 rows × 38 columns


In [8]:
df_rc_2[[col for col in df_rc_2.columns if col.startswith('detailed')]]


Out[8]:
detailed_reason_1 detailed_reason_2 detailed_reason_3 detailed_reason_4
0 [(-inf < 'mean concave points' <= 0.017368)] [(0.36444 < 'radius error' <= 0.55352)] [(387.1 < 'mean area' <= 477.3)] [(71.884 < 'mean perimeter' <= 80.624)]

In [9]:
effects = [col for col in df_rc_2.columns if col.endswith('_Effect')]
transposed = df_rc_2[effects].transpose()
sorted_rc = transposed.sort_values(by=transposed.columns[0] , ascending=False)
print(sorted_rc.head())
sorted_rc.plot.barh(figsize=(12,8))


                                       0
mean concave points_Effect  6.380884e-04
radius error_Effect         1.576499e-05
mean perimeter_Effect      -1.110223e-15
mean area_Effect           -1.110223e-15
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fee4694c2e8>

In [ ]: