The walk through code of "An Introduction to XGBoost R package"

link here



In [ ]:

    
library(xgboost)
library(DiagrammeR)



In [ ]:

    
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test



In [ ]:

    
str(train)



In [ ]:

    
model <- xgboost(data = train$data, label = train$label,
                 nrounds = 2, objective = "binary:logistic")



In [ ]:

    
preds = predict(model, test$data)



In [ ]:

    
str(preds)



In [ ]:

    
cv.res <- xgb.cv(data = train$data, label = train$label, nfold = 5,
                 nrounds = 2, objective = "binary:logistic")



In [ ]:

    
loglossobj <- function(preds, dtrain) {
  # dtrain is the internal format of the training data
  # We extract the labels from the training data
  labels <- getinfo(dtrain, "label")
  # We compute the 1st and 2nd gradient, as grad and hess
  preds <- 1/(1 + exp(-preds))
  grad <- preds - labels
  hess <- preds * (1 - preds)
  # Return the result as a list
  return(list(grad = grad, hess = hess))
}



In [ ]:

    
model <- xgboost(data = train$data, label = train$label,
                 nrounds = 2, objective = loglossobj, eval_metric = "error")



In [ ]:

    
bst <- xgb.cv(data = train$data, label = train$label, nfold = 5,
              nrounds = 20, objective = "binary:logistic",
              early_stopping_rounds = 3, maximize = FALSE)

continue Training



In [ ]:

    
dtrain <- xgb.DMatrix(train$data, label = train$label)



In [ ]:

    
model <- xgboost(data = dtrain, nrounds = 2, objective = "binary:logistic")



In [ ]:

    
pred_train <- predict(model, dtrain, outputmargin=TRUE)



In [ ]:

    
str(pred_train)



In [ ]:

    
setinfo(dtrain, "base_margin", pred_train)



In [ ]:

    
model <- xgboost(data = dtrain, nrounds = 2, objective = "binary:logistic")

Model inspection



In [ ]:

    
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
               eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
xgb.plot.tree(feature_names = agaricus.train$data@Dimnames[[2]], model = bst)