In [2]:
library(MASS)
# Generating the data
set.seed(558562316)
N <- 20 # Sample size
x1 <- runif(n=N)
x2 <- runif(n=N)
x3 <- runif(n=N)
x3c <- 10*x1 + x3 # New variable
ep <- rnorm(n=N)
y <- x1 + x2 + ep
In [4]:
# OLS fit of 3-variable model using independent x3
ols <- lm(y~ x1 + x2 + x3)
summary(ols)
In [5]:
# OLS fit of 3-variable model using correlated x3.
olsc <- lm(y~ x1 + x2 + x3c)
summary(olsc)
In [6]:
# Ridge regression using independent variables
ridge <- lm.ridge (y ~ x1+x2+x3, lambda = seq(0, .1, .001))
summary(ridge)
plot(ridge)
In [7]:
# Ridge regression using correlated variables
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, .1, .001))
plot(ridgec)
select(ridgec)
In [8]:
# Selection of constant is at endpoint. Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 1, .1))
plot(ridgec)
select(ridgec)
In [9]:
# Selection of constant is at endpoint. Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 10, .01))
plot(ridgec)
select(ridgec)
In [14]:
# Final model uses lambda=4.
ridge.final <- lm.ridge (y ~ x1+x2+x3c)
#ridge.final
summary(ridge.final)
# Create test data and compute predicted values for OLS and ridge.
# There's no predict() method for "ridgelm" objects
In [24]:
ridge.final$coef
In [25]:
test <- expand.grid(x1 = seq(.05,.95,.1), x2 = seq(.05,.95,.1), x3=seq(.05,.95,.1))
mu <- test$x1 + test$x2
test$x3c <- 10*test$x1 + test$x3
pred.ols <- predict(ols,newdata=test) # y ~ X1 + X2 + X3
pred.olsc <- predict(olsc,newdata=test) # y ~ X1 + X2 + X3c
pred.ridge <- coef(ridge.final)[1] + coef(ridge.final)[2]*test[,1] +
coef(ridge.final)[3]*test[,2] + coef(ridge.final)[4]*test[,4]
In [26]:
MSPE.ols <- sum((pred.ols - mu)^2)/length(mu)
MSPE.olsc <- sum((pred.olsc - mu)^2)/length(mu)
MSPE.ridge <- sum((pred.ridge - mu)^2)/length(mu)
MSPE.ols
MSPE.olsc
MSPE.ridge
In [30]:
library(glmnet)
In [31]:
swiss <- datasets::swiss
x <- model.matrix(Fertility~., swiss)[,-1]
y <- swiss$Fertility
lambda <- 10^seq(10, -2, length = 100)
In [43]:
set.seed(489)
In [44]:
train = sample(1:nrow(x), nrow(x)/2)
In [49]:
test = (-train)
In [50]:
ytest<-y[test]
In [61]:
nrow(x)
length(y)
In [62]:
library(glmnet)
In [95]:
data(swiss)
class(swiss)
In [73]:
x<-model.matrix(Infant.Mortality~., swiss)[,-1] #Esto lo unico que hace es sacar la matriz
In [74]:
y<-swiss$Infant.Mortality #Define cual es la y a donde quiero llegar
In [76]:
head(x,5)
head(swiss,5)
In [80]:
ridge.mod<-glmnet(x, y, alpha=0, nlambda=100, lambda.min.ratio=0.0001)
In [81]:
ridge.mod$lambda[100]
In [83]:
coef(ridge.mod)[,100] #Estos son los coeficientes estimados para una semana completa
#El 100 es el mas parecido a un OLS y el 1 es el mas parecido a un pt~1
#Entonces debo buscar el optimo para identificar los coeficientes
In [93]:
set.seed(1)
cv.out<-cv.glmnet(x,y,alpha=0,nlambda=100, lambda.min.ratio=0.0001)
plot(cv.out)
In [94]:
best.lambda<-cv.out$lambda.min
In [99]:
predict(ridge.mod, s=best.lambda, type="coefficients")[1:6, ] # Con estos coeficientes calculo la semana y calculo el error
In [ ]:
#Ahora que pasa si tambien voy variando los modelos que entran??? para simular un selection forward