In [ ]:
import sys
import h2o

In [ ]:
# Connect to a pre-existing cluster
h2o.init()

In [ ]:
df = h2o.import_frame(path=h2o.locate("smalldata/logreg/prostate.csv"))

In [ ]:
df.describe()

In [ ]:
# Remove ID from training frame
train = df.drop("ID")

In [ ]:
# For VOL & GLEASON, a zero really means "missing"
vol = train['VOL']
vol[vol == 0] = None
gle = train['GLEASON']
gle[gle == 0] = None

In [ ]:
# Convert CAPSULE to a logical factor
train['CAPSULE'] = train['CAPSULE'].asfactor()

In [ ]:
# See that the data is ready
train.describe()

In [ ]:
# Run GBM
my_gbm = h2o.gbm(           y=train["CAPSULE"],
                 validation_y=train["CAPSULE"],
                            x=train[1:],
                 validation_x=train[1:],
                 loss = "bernoulli",
                 ntrees=50,
                 learn_rate=0.1)

In [ ]:
my_gbm_metrics = my_gbm.model_performance(train)
my_gbm_metrics.show()