rf.boston <- randomForest(medv ~ . - medv, data = Boston, subset = train)mtry, i.e. the number of predictors in each split.boost.boston <- gbm(medv ~ ., data = Boston[train,], distribution = "gaussian", n.trees = 10000, shrinkage = 0.01, interaction.depth = 4)interaction.depth is the number of splits in each tree.distribution = "gaussian" squared errorsummary, print out the importance graphplot(boost.boston, i = "lstat")predmat <- predict(boost.boston, newdata = Boston[-train,], n.trees = n.trees)n.trees can be a sequenceapply((predmat-medv)^2, 2, mean)predmat is a matrix but medv is a vector, medv is reused in each row.
In [ ]:
require(gbm)
require(tidyverse)
In [ ]:
boost.boston <- gbm(medv ~ ., data = Boston[train,], distribution = "gaussian", n.trees = 10000,
shrinkage = 0.01, interaction.depth = 4)
summary(boost.boston)
In [ ]:
plot(boost.boston, i = "lstat")
In [ ]:
plot(boost.boston, i = "rm")
In [ ]:
n.trees <- seq(100, 10000, 100)
predmat <- predict(boost.boston, newdata = Boston[-train,], n.trees = n.trees)
dim(predmat)
In [ ]:
perr <- with(Boston[-train,], apply((predmat-medv)^2, 2, mean))
In [ ]:
ggplot() +
geom_point(mapping = aes(x = n.trees, y = perr), color = "blue")
In [ ]:
require(randomForest)
require(MASS)
In [ ]:
set.seed(101)
In [ ]:
dim(Boston)
In [ ]:
train <- sample(1:nrow(Boston), 300)
In [ ]:
length(train)
In [ ]:
rf.boston <- randomForest(medv ~ ., data = Boston, subset = train)
str(rf.boston)
In [ ]:
oob.err = double(13)
test.err = double(13)
for (mtry in 1:13) {
fit <- randomForest(medv ~ ., data = Boston, subset = train, mtry = mtry, ntree = 400)
oob.err[mtry] <- fit$mse[400]
pred <- predict(fit, Boston[-train,])
test.err[mtry] <- with(Boston[-train,], mean((pred-medv)^2))
cat(mtry, "")
}
In [ ]:
matplot(1:mtry, cbind(test.err, oob.err), pch = 19, col = c("red", "blue"), type = "b")
legend("topright", legend = c("Test", "OOB"), pch = 19, col = c("red", "blue"))