In [69]:
# This tutorial is losely based on https://xiaoxiaowang87.github.io/monotonicity_constraint/
import h2o
from h2o.estimators import H2OXGBoostEstimator
from h2o.estimators import H2OGradientBoostingEstimator
import numpy as np
import pandas as pd
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets.california_housing import fetch_california_housing
In [48]:
cal_housing = fetch_california_housing()
In [49]:
h2o.init()
In [50]:
data = h2o.H2OFrame(cal_housing.data, column_names=cal_housing.feature_names)
data["target"] = h2o.H2OFrame(cal_housing.target)
In [51]:
train, test = data.split_frame([0.6], seed=123)
In [52]:
train.summary()
In [53]:
test.summary()
In [54]:
feature_names = ['MedInc', 'AveOccup', 'HouseAge']
monotone_constraints = {"MedInc": 1, "AveOccup": -1, "HouseAge": 1}
In [55]:
xgb_mono = H2OXGBoostEstimator(monotone_constraints=monotone_constraints)
xgb_mono.train(x=feature_names, y="target", training_frame=train, validation_frame=test)
In [56]:
xgb_mono.model_performance()
Out[56]:
In [57]:
xgb_mono.model_performance(valid=True)
Out[57]:
In [58]:
gbm_mono = H2OGradientBoostingEstimator(monotone_constraints=monotone_constraints)
gbm_mono.train(x=feature_names, y="target", training_frame=train, validation_frame=test)
In [59]:
gbm_mono.model_performance()
Out[59]:
In [60]:
gbm_mono.model_performance(valid=True)
Out[60]:
In [61]:
xgb_mono.varimp_plot()
In [62]:
gbm_mono.varimp_plot()
In [63]:
pd.DataFrame.from_items(
[('H2O XGBoost', [xgb_mono.rmse(), xgb_mono.rmse(valid=True)]),
('H2O GBM', [gbm_mono.rmse(), gbm_mono.rmse(valid=True)])],
columns=['Train RMSE', 'Test RMSE'], orient="index")
Out[63]:
In [64]:
xgb_mono.partial_plot(data=train, cols=feature_names, nbins=100)
Out[64]:
In [65]:
gbm_mono.partial_plot(data=train, cols=feature_names, nbins=100)
Out[65]:
In [ ]: