In [2]:
    
library(MASS)
# Generating the data
set.seed(558562316)
N <- 20      # Sample size
x1 <- runif(n=N)
x2 <- runif(n=N)
x3 <- runif(n=N)
x3c <- 10*x1 + x3 # New variable
ep <- rnorm(n=N)
y <- x1 + x2 + ep
    
In [4]:
    
# OLS fit of 3-variable model using independent x3
ols <- lm(y~ x1 + x2 + x3)
summary(ols)
    
    
In [5]:
    
# OLS fit of 3-variable model using correlated x3.
olsc <- lm(y~ x1 + x2 + x3c)
summary(olsc)
    
    
In [6]:
    
# Ridge regression using independent variables
ridge <- lm.ridge (y ~ x1+x2+x3, lambda = seq(0, .1, .001))
summary(ridge)
plot(ridge)
    
    
    
In [7]:
    
# Ridge regression using correlated variables
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, .1, .001))
plot(ridgec)
select(ridgec)
    
    
    
In [8]:
    
# Selection of constant is at endpoint.  Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 1, .1))
plot(ridgec)
select(ridgec)
    
    
    
In [9]:
    
# Selection of constant is at endpoint.  Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 10, .01))
plot(ridgec)
select(ridgec)
    
    
    
In [14]:
    
# Final model uses lambda=4.
ridge.final <- lm.ridge (y ~ x1+x2+x3c)
#ridge.final
summary(ridge.final)
# Create test data and compute predicted values for OLS and ridge.
#  There's no predict() method for "ridgelm" objects
    
    
In [24]:
    
ridge.final$coef
    
    
In [25]:
    
test <- expand.grid(x1 = seq(.05,.95,.1), x2 = seq(.05,.95,.1), x3=seq(.05,.95,.1))
mu <- test$x1 + test$x2
test$x3c <- 10*test$x1 + test$x3
pred.ols <- predict(ols,newdata=test)   # y ~ X1 + X2 + X3
pred.olsc <- predict(olsc,newdata=test) # y ~ X1 + X2 + X3c
pred.ridge <- coef(ridge.final)[1] + coef(ridge.final)[2]*test[,1] + 
  coef(ridge.final)[3]*test[,2] + coef(ridge.final)[4]*test[,4]
    
In [26]:
    
MSPE.ols <- sum((pred.ols - mu)^2)/length(mu)
MSPE.olsc <- sum((pred.olsc - mu)^2)/length(mu)
MSPE.ridge <- sum((pred.ridge - mu)^2)/length(mu)
MSPE.ols
MSPE.olsc
MSPE.ridge
    
    
    
    
In [30]:
    
library(glmnet)
    
    
In [31]:
    
swiss <- datasets::swiss
x <- model.matrix(Fertility~., swiss)[,-1]
y <- swiss$Fertility
lambda <- 10^seq(10, -2, length = 100)
    
In [43]:
    
set.seed(489)
    
In [44]:
    
train = sample(1:nrow(x), nrow(x)/2)
    
In [49]:
    
test = (-train)
    
In [50]:
    
ytest<-y[test]
    
In [61]:
    
nrow(x)
length(y)
    
    
    
In [62]:
    
library(glmnet)
    
In [95]:
    
data(swiss)
class(swiss)
    
    
In [73]:
    
x<-model.matrix(Infant.Mortality~., swiss)[,-1] #Esto lo unico que hace es sacar la matriz
    
In [74]:
    
y<-swiss$Infant.Mortality #Define cual es la y a donde quiero llegar
    
In [76]:
    
head(x,5)
head(swiss,5)
    
    
    
In [80]:
    
ridge.mod<-glmnet(x, y, alpha=0, nlambda=100, lambda.min.ratio=0.0001)
    
In [81]:
    
ridge.mod$lambda[100]
    
    
In [83]:
    
coef(ridge.mod)[,100] #Estos son los coeficientes estimados para una semana completa
#El 100 es el mas parecido a un OLS y el 1 es el mas parecido a un pt~1
#Entonces debo buscar el optimo para identificar los coeficientes
    
    
In [93]:
    
set.seed(1)
cv.out<-cv.glmnet(x,y,alpha=0,nlambda=100, lambda.min.ratio=0.0001)
plot(cv.out)
    
    
In [94]:
    
best.lambda<-cv.out$lambda.min
    
In [99]:
    
predict(ridge.mod, s=best.lambda, type="coefficients")[1:6, ] # Con estos coeficientes calculo la semana y calculo el error
    
    
In [ ]:
    
#Ahora que pasa si tambien voy variando los modelos que entran??? para simular un selection forward