Note: this example is adapted from an example published in the shap package https://github.com/slundberg/shap/blob/master/notebooks/tree_explainer/Front%20page%20example%20(XGBoost).ipynb
In [1]:
import h2o
import shap
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o import H2OFrame
# initialize H2O
h2o.init()
# load JS visualization code to notebook
shap.initjs()
In [2]:
# train a GBM model in H2O
X, y = shap.datasets.boston()
boston_housing = H2OFrame(X).cbind(H2OFrame(y, column_names=["medv"]))
model = H2OGradientBoostingEstimator(learn_rate=0.01, ntrees=100)
model.train(training_frame=boston_housing, y="medv")
In [3]:
# calculate SHAP values using function predict_contributions
contributions = model.predict_contributions(boston_housing)
In [4]:
# convert the H2O Frame to use with shap's visualization functions
contributions_matrix = contributions.as_data_frame().as_matrix()
# shap values are calculated for all features
shap_values = contributions_matrix[:,0:13]
# expected values is the last returned column
expected_value = contributions_matrix[:,13].min()
In [5]:
# visualize the first prediction's explanation
shap.force_plot(expected_value, shap_values[0,:], X.iloc[0,:])
Out[5]:
In [6]:
# visualize the training set predictions
shap.force_plot(expected_value, shap_values, X)
Out[6]:
In [7]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot("RM", shap_values, X)
In [8]:
# summarize the effects of all the features
shap.summary_plot(shap_values, X)
In [9]:
shap.summary_plot(shap_values, X, plot_type="bar")
In [ ]: