## CV with 1SE rule for the Auto data
set.seed(78)
library(ISLR)
n_train <- nrow(Auto)
n_folds <- 10
degree = 1:10
cv_degree <- rep(0,10)
se_degree <- rep(0,10)
for (d in degree) {
folds_i <- sample(rep(1:n_folds,
length.out = n_train))
cv_values <- rep(0,n_folds)
for (k in 1:n_folds) {
test_i <- which(folds_i == k)
train <- Auto[-test_i, ]
test <- Auto[test_i, ]
glm.fit=glm(mpg~poly(horsepower,d),
data=train)
preds=predict(glm.fit,
newdata=test)
cv_values[k] = mean((preds - test$mpg)^2)
}
cv_degree[d] <- mean(cv_values)
se_degree[d] <- sd(cv_values)/sqrt(n_folds)
}
plot(degree,
cv_degree,
type="b",
pch=19,
cex=2,
lwd = 2,
ylim=range(c(cv_degree-se_degree, cv_degree+se_degree)),
xlab="Degree of Polynomial",
ylab="CV Error",
main="Auto data")
arrows(degree,
cv_degree-se_degree,
degree,
cv_degree+se_degree,
length=0.05,
angle=90,
code=3)
i <- which.min(cv_degree)
points(i,
cv_degree[i],
pch=19,
cex=3,
col="red")
abline(cv_degree[i] + se_degree[i],
0,
col="red",
lty=2,
lwd = 2)
abline(cv_degree[i] - se_degree[i],
0,
col="red",
lty=2,
lwd = 2)