In [1]:
from sklearn import datasets
import pandas as pd
import numpy as np
%matplotlib inline
In [2]:
ds = datasets.load_breast_cancer();
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(min_samples_leaf=30, random_state = 1960)
NC = 12
X = ds.data[:,0:NC]
y = ds.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1960)
clf.fit(X_train , y_train)
Out[2]:
In [3]:
import graphviz
from sklearn import tree
dot_data = tree.export_graphviz(clf, out_file=None,
feature_names=ds.feature_names[0:NC],
class_names=ds.target_names,
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph
Out[3]:
In [4]:
# Explain the score = ln(p(1) / (1 - p(1)))
import sklearn_explain.explainer as expl
lExplainer = expl.cModelScoreExplainer(clf)
lExplainer.mSettings.mFeatureNames = ds.feature_names[0:NC]
lExplainer.mSettings.mExplanationOrder = 2
lExplainer.fit(X_train)
df_rc = lExplainer.explain(X_test)
# print(df_rc.columns)
In [5]:
df_rc_2 = lExplainer.explain(X_test[0].reshape(1, -1))
In [6]:
X_test[0].reshape(1, -1)
Out[6]:
In [7]:
df_rc_2
Out[7]:
In [8]:
df_rc_2[[col for col in df_rc_2.columns if col.startswith('detailed')]]
Out[8]:
In [9]:
lExplainer.mImplementation.mFeatureQuantiles
Out[9]:
In [10]:
lFeature_Quantiles = {
'mean area': {0: -np.inf,
1: 571.85},
'mean concave points': {0: -np.inf,
1: 0.51},
'mean perimeter': {0: -np.inf,
1: 98.32},
'radius error': {0: -np.inf,
1: 0.354}
}
In [11]:
lFeature_Quantiles
Out[11]:
In [12]:
lExplainer2 = expl.cModelScoreExplainer(clf)
lExplainer2.mSettings.mFeatureNames = ds.feature_names[0:NC]
lExplainer2.mSettings.mCustomFeatureQuantiles = lFeature_Quantiles
lExplainer2.mSettings.mExplanationOrder = 2
lExplainer2.fit(X_train)
df_rc2 = lExplainer2.explain(X_test)
# print(df_rc2.columns)
df_rc_2 = lExplainer2.explain(X_test[0].reshape(1, -1))
df_rc_2[[col for col in df_rc_2.columns if col.startswith('detailed')]]
Out[12]:
In [13]:
lExplainer.mImplementation.mScoreQuantiles
Out[13]:
In [14]:
lScore_Quantiles = {
0: -np.inf,
1: 0.
}
In [15]:
lExplainer3 = expl.cModelScoreExplainer(clf)
lExplainer3.mSettings.mFeatureNames = ds.feature_names[0:NC]
lExplainer3.mSettings.mCustomFeatureQuantiles = lFeature_Quantiles
lExplainer3.mSettings.mCustomScoreQuantiles = lScore_Quantiles
lExplainer3.mSettings.mExplanationOrder = 2
lExplainer3.fit(X_train)
df_rc3 = lExplainer2.explain(X_test)
# print(df_rc2.columns)
df_rc_3 = lExplainer3.explain(X_test[0].reshape(1, -1))
df_rc_3[[col for col in df_rc_3.columns if col.startswith('detailed')]]
Out[15]:
In [ ]: