Summary
load package tree
model <- tree(formula = ..., data = ...)summary(model)plot(model)text(model, pretty = 0)modeltree.pred <- predict(tree.carseats, CarseatsNew[-train,], type = "class")prune.carseats <- prune.misclass(tree.carseats, best = 13)CV: cv.carserts <- cv.tree(tree.carseats, FUN = prune.misclass)
Question
For the default use of tree, what is the stop criterion, I see some of nodes have 5 data, some of them have 26 of data
In [ ]:
require(tree)
require(ISLR)
In [ ]:
require(tidyverse)
In [ ]:
summary(Carseats)
In [ ]:
ggplot(data = Carseats) +
geom_histogram(mapping = aes(x = Sales), binwidth = 2)
In [ ]:
High <- ifelse(Carseats$Sales <= 8, "No", "Yes")
In [ ]:
CarseatsNew <- transform(Carseats, High = High)
In [ ]:
tree.carseats <- tree(data = CarseatsNew, formula = High ~ . - Sales)
In [ ]:
summary(tree.carseats)
In [ ]:
plot(tree.carseats)
text(tree.carseats, pretty = 0)
In [ ]:
tree.carseats
In [ ]:
set.seed(1011)
train <- sample(1:nrow(Carseats), 250)
# head(train)
tree.carseats <- tree(High ~ . - Sales, CarseatsNew, subset = train)
In [ ]:
plot(tree.carseats)
text(tree.carseats, pretty = 0)
In [ ]:
tree.pred <- predict(tree.carseats, CarseatsNew[-train,], type = "class")
tree.pred
In [ ]:
with(CarseatsNew[-train,], table(tree.pred, High))
In [ ]:
cv.carserts <- cv.tree(tree.carseats, FUN = prune.misclass)
In [ ]:
cv.carserts
In [ ]:
plot(cv.carserts)
In [ ]:
prune.carseats <- prune.misclass(tree.carseats, best = 13)
plot(prune.carseats); text(prune.carseats)
In [ ]:
tree.pred <- predict(prune.carseats, CarseatsNew[-train,], type = "class")
with(CarseatsNew[-train,], table(tree.pred, High))