Note: this example is adapted from an example published in the shap package https://github.com/slundberg/shap/blob/master/notebooks/tree_explainer/Front%20page%20example%20(XGBoost).ipynb
In [4]:
import h2o
import shap
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o import H2OFrame
# initialize H2O
h2o.init()
# load JS visualization code to notebook
shap.initjs()
In [6]:
# train a GBM model in H2O
X, y = shap.datasets.boston()
boston_housing = H2OFrame(X).cbind(H2OFrame(y, column_names=["medv"]))
model = H2ORandomForestEstimator(ntrees=100)
model.train(training_frame=boston_housing, y="medv")
In [7]:
# calculate SHAP values using function predict_contributions
contributions = model.predict_contributions(boston_housing)
In [8]:
# convert the H2O Frame to use with shap's visualization functions
contributions_matrix = contributions.as_data_frame().as_matrix()
# shap values are calculated for all features
shap_values = contributions_matrix[:,0:13]
# expected values is the last returned column
expected_value = contributions_matrix[:,13].min()
In [9]:
# visualize the first prediction's explanation
shap.force_plot(expected_value, shap_values[0,:], X.iloc[0,:])
Out[9]:
In [10]:
# visualize the training set predictions
shap.force_plot(expected_value, shap_values, X)
Out[10]:
In [11]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot("RM", shap_values, X)
In [12]:
# summarize the effects of all the features
shap.summary_plot(shap_values, X)
In [13]:
shap.summary_plot(shap_values, X, plot_type="bar")