This Python notebook compares the built-in explainability features of XGBoost with the one provided by LIME. The outline of this notebook is as follows
In [1]:
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import LabelBinarizer, StandardScaler, LabelEncoder
import xgboost as xgb
import lime.lime_tabular
from graphviz import Digraph
    
In [2]:
    
np.random.seed(1)
%matplotlib inline
    
In [3]:
    
def _parse_node_patched_with_leaf_index(graph, text):
    """parse dumped node"""
    match = xgb.plotting._NODEPAT.match(text)
    if match is not None:
        node = match.group(1)
        graph.node(node, label=match.group(2), shape='circle')
        return node
    match = xgb.plotting._LEAFPAT.match(text)
    if match is not None:
        node = match.group(1)
        graph.node(node, label=match.group(2) +
                   '\nleaf index: ' + node, shape='box')
        return node
    raise ValueError('Unable to parse node: {0}'.format(text))
old_parse_node = xgb.plotting._parse_node
xgb.plotting._parse_node = _parse_node_patched_with_leaf_index
    
The Shapley value is a solution concept in cooperative game theory.
SHAP values are defined as the Shapley values of a conditional expectation function and are introduced in the publication A unified approach to interpreting model predictions.
fever and cough.
In [4]:
    
columns_ex = ['fever', 'cough', 'bias']
def plot_sickness_graph(label, contributions, cough_yes_leaves=[0, 100]):
    def format_contributions(contribs):
        zipped = zip(columns_ex, contribs[0])
        def combine_contrib(tup): return ": ".join(str(i) for i in tup)
        return '\n'.join(map(combine_contrib, zipped))
    dot = Digraph()
    dot.attr('graph', label=label)
    dot.attr('graph', labelloc='t')
    dot.attr('graph', rank='LR')
    dot.attr('node', shape='box')
    dot.node('R', 'Cough')
    dot.node('F1', 'Fever')
    dot.node('F2', 'Fever')
    dot.node('L1', '0', shape='plaintext')
    dot.node('L2', '0', shape='plaintext')
    dot.node('L3', str(cough_yes_leaves[0]), shape='plaintext')
    dot.node('L4', str(cough_yes_leaves[1]), shape='plaintext')
    dot.edge('R', 'F1', label='No')
    dot.edge('R', 'F2', label='Yes')
    dot.edge('F1', 'L1', label='No')
    dot.edge('F1', 'L2', label='Yes')
    dot.edge('F2', 'L3', label='No')
    dot.edge('F2', 'L4', label='Yes')
    dot.node('Feature contributions\n' +
             format_contributions(contributions), shape='plaintext')
    return dot
    
The following two instances of the sickness score example illustrate the contribution values calculated before XGBoost was extended with Tree SHAP, ie. before the pull request #2438 was merged and before commit 78c4188cec31425f708d238160ea3afb67a7250a.
In [5]:
    
contributions_and = [[39.33431244, 35.66350174, 24.99929047]]
plot_sickness_graph(
    'Sickness score example #1 (simple AND)', contributions_and)
    
    Out[5]:
In [6]:
    
contributions = np.array([[49.9985466, 29.99912071, 29.99913406]])
plot_sickness_graph(
    'Sickness score example #2 (with increased impact of "cough")', contributions, [10, 110])
    
    Out[6]:
In [7]:
    
contribution_shift_pre_shap = pd.DataFrame(
    contributions - contributions_and, columns=columns_ex)
contribution_shift_pre_shap
    
    Out[7]:
The next two instances correspond to the previous ones but the contributions are calculated with Tree SHAP
cough increases in the second instance
In [8]:
    
data = np.array([[0, 0, 0],
                 [1, 0, 0],
                 [0, 1, 0],
                 [1, 1, 100]]).repeat([100, 100, 100, 100], axis=0)
sickness = pd.DataFrame(data, columns=['fever', 'cough', 'sickness'])
bst_test = xgb.XGBRegressor(objective='reg:linear')
bst_test.fit(sickness.iloc[:, :-1].values, sickness.iloc[:, -1])
dmatrix = xgb.DMatrix(np.array([[1, 1]]))
contributions_and_new = bst_test.get_booster().predict(dmatrix, pred_contribs=True)
plot_sickness_graph('Sickness score example #1 (simple AND)',
                    contributions_and_new)
    
    Out[8]:
In [9]:
    
data = np.array([[0, 0, 0],
                 [1, 0, 0],
                 [0, 1, 10],
                 [1, 1, 110]]).repeat([100, 100, 100, 100], axis=0)
sickness = pd.DataFrame(data, columns=['fever', 'cough', 'sickness'])
bst_test.fit(sickness.iloc[:, :-1].values, sickness.iloc[:, -1])
dmatrix = xgb.DMatrix(np.array([[1, 1]]))
contributions_new = bst_test.get_booster().predict(dmatrix, pred_contribs=True)
plot_sickness_graph(
    'Sickness score example #2 (with increased impact of "cough")', contributions_new, [10, 110])
    
    Out[9]:
In [10]:
    
contribution_shift_post_shap = pd.DataFrame(
    contributions_new - contributions_and_new, columns=columns_ex)
contribution_shift_post_shap
    
    Out[10]:
The dataset being used is the Adult Census Data Set, which has a significant amount of instances and attributes and contains continuous as well as categorical attributes.
In [11]:
    
column_names = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Marital Status", "Occupation",
                "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Label"]
df = pd.read_csv('data/adult.data', names=column_names, index_col=False)
    
In [12]:
    
df
    
    Out[12]:
In [13]:
    
from sklearn_pandas import DataFrameMapper, cross_val_score
label_binarizer = LabelBinarizer()
mapper = DataFrameMapper([
    (['Age'], StandardScaler()),
    ('Workclass', LabelEncoder()),
    (['fnlwgt'], StandardScaler()),
    ('Education', LabelEncoder()),
    (['Education-Num'], StandardScaler()),
    ('Marital Status', LabelEncoder()),
    ('Occupation', LabelEncoder()),
    ('Relationship', LabelEncoder()),
    ('Race', LabelEncoder()),
    ('Sex', LabelEncoder()),
    (['Capital Gain'], StandardScaler()),
    (['Capital Loss'], StandardScaler()),
    (['Hours per week'], StandardScaler()),
    ('Country', LabelEncoder()),
    ('Label', label_binarizer)
],
    df_out=True
)
fitted_df = mapper.fit_transform(df)
    
    
In [14]:
    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    fitted_df.iloc[:, :-1], fitted_df.iloc[:, -1], test_size=0.20)
bst = xgb.XGBClassifier(n_estimators=300, max_depth=5)
bst.fit(X_train.values, y_train.values)
    
    Out[14]:
In [15]:
    
from sklearn.metrics import accuracy_score
accuracy_score(y_test.values, bst.predict(X_test.values))
    
    Out[15]:
In [16]:
    
i = 1412
X_train.iloc[i, :]
    
    Out[16]:
In [17]:
    
booster = bst.get_booster()
inst = np.array([X_train.iloc[i, :].values])
prediction = bst.predict(inst)
print('predicted class (transformed to 0/1): ' + str(bst.predict(inst)[0]))
print('actual class (transformed to 0/1): ' + str(y_train[i]))
print('precicted class: {} of {}'.format(
    str(label_binarizer.classes_[int(prediction)]), label_binarizer.classes_))
    
    
In [18]:
    
dtrain = xgb.DMatrix(np.array([X_train.iloc[i, :].values]))
print('Predicted probability: ' + str(booster.predict(dtrain)))
print('Predicted raw value: {}\n'.format(
    booster.predict(dtrain, output_margin=True)))
contribs = booster.predict(dtrain, pred_contribs=True)
contributions2 = zip(column_names[:-1] + ['Bias'], contribs[0])
print('Contributions (sorted by absolute value):\n' + '\n'.join(str(x)
                                                                for x in sorted(contributions2, key=lambda x: abs(x[1]), reverse=True)))
print("Sum of contributions {}".format(
    booster.predict(dtrain, pred_contribs=True).sum()))
    
    
In [19]:
    
import shap
# load JS visualization code to notebook
shap.initjs()
    
    
In [20]:
    
dmatrix = xgb.DMatrix(X_train.values)
shap_values = bst.get_booster().predict(dmatrix, pred_contribs=True)
# visualize the first prediction's explaination
shap.visualize(
    shap_values[i, :], feature_names=df.columns[:-1], data=X_train.iloc[i, :].values)
    
    Out[20]:
In [21]:
    
categorical_features_idxs = [1, 3, 5, 6, 7, 8, 9, 13]
categorical_names = {}
for feature in categorical_features_idxs:
    le = sklearn.preprocessing.LabelEncoder()
    le.fit(df.iloc[:, feature])
    categorical_names[feature] = le.classes_
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values,  # fitted_df.iloc[:,:-1].values.astype(float),
                                                   feature_names=['Age', 'Workclass', 'fnlwgt', 'Education', 'Education-Num',
                                                                  'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex',
                                                                  'Capital gain', 'Capital Loss', 'Hours per week', 'Country'],
                                                   categorical_features=categorical_features_idxs,
                                                   categorical_names=categorical_names,
                                                   class_names=label_binarizer.classes_,
                                                   kernel_width=3, verbose=False)
    
In [22]:
    
# Custom predict function not necessary, as all date has been transformed already
# predict_fn = lambda x: bst.predict_proba(mapper.fit_transform(x)).astype(float)
exp = explainer.explain_instance(
    X_train.iloc[i, :].values, bst.predict_proba, num_features=4)
exp.show_in_notebook(show_all=False)
    
    
In [23]:
    
exp.show_in_notebook(show_all=False)
shap.visualize(
    shap_values[i, :], feature_names=df.columns[:-1], data=X_train.iloc[i, :].values)
    
    
    Out[23]:
In [24]:
    
legend_text = """Instance:  {0}
{1}
Predicted leaf: {2}
""".format(i, X_train.iloc[i, :], booster.predict(dtrain, pred_leaf=True)[0][0])
legend_test = '''
Instance:  + str(i)
'''
    
In [25]:
    
%config InlineBackend.figure_format = 'retina'
plt.rc('figure', figsize=(50, 50))
fig = plt.figure()
ax = fig.add_subplot(111)
xgb.plot_tree(bst, fmap='data/adult.fmap', num_trees=0, rankdir='LR', ax=ax)
ax.text(0.0, 0.7, legend_text, bbox={
        'facecolor': 'gray', 'alpha': 0.5, 'pad': 5}, fontsize=30, transform=ax.transAxes)
plt.show()
    
    
In [26]:
    
plt.rc('figure', figsize=(15, 15))
scores = booster.get_score(fmap='data/adult.fmap', importance_type='gain')
xgb.plot_importance(scores, xlabel='Gain')
    
    Out[26]: