In [2]:
library(MASS)

# Generating the data

set.seed(558562316)
N <- 20      # Sample size

x1 <- runif(n=N)
x2 <- runif(n=N)
x3 <- runif(n=N)
x3c <- 10*x1 + x3 # New variable
ep <- rnorm(n=N)
y <- x1 + x2 + ep

In [4]:
# OLS fit of 3-variable model using independent x3
ols <- lm(y~ x1 + x2 + x3)
summary(ols)


Call:
lm(formula = y ~ x1 + x2 + x3)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.19698 -0.28592  0.04026  0.24016  1.20322 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  -0.4293     0.4916  -0.873   0.3954   
x1            1.7851     0.4812   3.710   0.0019 **
x2            0.7119     0.4622   1.540   0.1430   
x3            0.2839     0.5122   0.554   0.5870   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6306 on 16 degrees of freedom
Multiple R-squared:  0.4831,	Adjusted R-squared:  0.3862 
F-statistic: 4.984 on 3 and 16 DF,  p-value: 0.0125

In [5]:
# OLS fit of 3-variable model using correlated x3.
olsc <- lm(y~ x1 + x2 + x3c)
summary(olsc)


Call:
lm(formula = y ~ x1 + x2 + x3c)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.19698 -0.28592  0.04026  0.24016  1.20322 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  -0.4293     0.4916  -0.873    0.395
x1           -1.0540     5.1966  -0.203    0.842
x2            0.7119     0.4622   1.540    0.143
x3c           0.2839     0.5122   0.554    0.587

Residual standard error: 0.6306 on 16 degrees of freedom
Multiple R-squared:  0.4831,	Adjusted R-squared:  0.3862 
F-statistic: 4.984 on 3 and 16 DF,  p-value: 0.0125

In [6]:
# Ridge regression using independent variables
ridge <- lm.ridge (y ~ x1+x2+x3, lambda = seq(0, .1, .001))
summary(ridge)
plot(ridge)


       Length Class  Mode   
coef   303    -none- numeric
scales   3    -none- numeric
Inter    1    -none- numeric
lambda 101    -none- numeric
ym       1    -none- numeric
xm       3    -none- numeric
GCV    101    -none- numeric
kHKB     1    -none- numeric
kLW      1    -none- numeric

In [7]:
# Ridge regression using correlated variables
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, .1, .001))
plot(ridgec)
select(ridgec)


modified HKB estimator is 0.4209458 
modified L-W estimator is 1.337588 
smallest value of GCV  at 0.1 

In [8]:
# Selection of constant is at endpoint.  Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 1, .1))
plot(ridgec)
select(ridgec)


modified HKB estimator is 0.4209458 
modified L-W estimator is 1.337588 
smallest value of GCV  at 1 

In [9]:
# Selection of constant is at endpoint.  Extend endpoint and try again
ridgec <- lm.ridge (y ~ x1+x2+x3c, lambda = seq(0, 10, .01))
plot(ridgec)
select(ridgec)


modified HKB estimator is 0.4209458 
modified L-W estimator is 1.337588 
smallest value of GCV  at 4.01 

In [14]:
# Final model uses lambda=4.
ridge.final <- lm.ridge (y ~ x1+x2+x3c)
#ridge.final
summary(ridge.final)
# Create test data and compute predicted values for OLS and ridge.
#  There's no predict() method for "ridgelm" objects


       Length Class  Mode   
coef   3      -none- numeric
scales 3      -none- numeric
Inter  1      -none- numeric
lambda 1      -none- numeric
ym     1      -none- numeric
xm     3      -none- numeric
GCV    1      -none- numeric
kHKB   1      -none- numeric
kLW    1      -none- numeric

In [24]:
ridge.final$coef


x1
-0.325516760469218
x2
0.228841271603357
x3c
0.886779779957445

In [25]:
test <- expand.grid(x1 = seq(.05,.95,.1), x2 = seq(.05,.95,.1), x3=seq(.05,.95,.1))
mu <- test$x1 + test$x2
test$x3c <- 10*test$x1 + test$x3
pred.ols <- predict(ols,newdata=test)   # y ~ X1 + X2 + X3
pred.olsc <- predict(olsc,newdata=test) # y ~ X1 + X2 + X3c
pred.ridge <- coef(ridge.final)[1] + coef(ridge.final)[2]*test[,1] + 
  coef(ridge.final)[3]*test[,2] + coef(ridge.final)[4]*test[,4]

In [26]:
MSPE.ols <- sum((pred.ols - mu)^2)/length(mu)
MSPE.olsc <- sum((pred.olsc - mu)^2)/length(mu)
MSPE.ridge <- sum((pred.ridge - mu)^2)/length(mu)

MSPE.ols
MSPE.olsc
MSPE.ridge


0.0658596445755188
0.0658596445755186
0.0658596445755189

Ejemplo R- Bloggers


In [30]:
library(glmnet)


Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-13


In [31]:
swiss <- datasets::swiss
x <- model.matrix(Fertility~., swiss)[,-1]
y <- swiss$Fertility
lambda <- 10^seq(10, -2, length = 100)

In [43]:
set.seed(489)

In [44]:
train = sample(1:nrow(x), nrow(x)/2)

In [49]:
test = (-train)

In [50]:
ytest<-y[test]

In [61]:
nrow(x)
length(y)


47
47

Ridge Regression


In [62]:
library(glmnet)

In [95]:
data(swiss)
class(swiss)


'data.frame'

In [73]:
x<-model.matrix(Infant.Mortality~., swiss)[,-1] #Esto lo unico que hace es sacar la matriz

In [74]:
y<-swiss$Infant.Mortality #Define cual es la y a donde quiero llegar

In [76]:
head(x,5)
head(swiss,5)


FertilityAgricultureExaminationEducationCatholic
Courtelary80.2 17.0 15 12 9.96
Delemont83.1 45.1 6 9 84.84
Franches-Mnt92.5 39.7 5 5 93.40
Moutier85.8 36.5 12 7 33.77
Neuveville76.9 43.5 17 15 5.16
FertilityAgricultureExaminationEducationCatholicInfant.Mortality
Courtelary80.2 17.0 15 12 9.9622.2
Delemont83.1 45.1 6 9 84.8422.2
Franches-Mnt92.5 39.7 5 5 93.4020.2
Moutier85.8 36.5 12 7 33.7720.3
Neuveville76.9 43.5 17 15 5.1620.6

In [80]:
ridge.mod<-glmnet(x, y, alpha=0, nlambda=100, lambda.min.ratio=0.0001)

In [81]:
ridge.mod$lambda[100]


0.120032451750052

In [83]:
coef(ridge.mod)[,100] #Estos son los coeficientes estimados para una semana completa

#El 100 es el mas parecido a un OLS y el 1 es el mas parecido a un pt~1

#Entonces debo buscar el optimo para identificar los coeficientes


(Intercept)
10.2666511748565
Fertility
0.133406125719913
Agriculture
-0.0151770573609689
Examination
0.0323564267293338
Education
0.0428544238456504
Catholic
0.00202449129043223

In [93]:
set.seed(1)
cv.out<-cv.glmnet(x,y,alpha=0,nlambda=100, lambda.min.ratio=0.0001)
plot(cv.out)



In [94]:
best.lambda<-cv.out$lambda.min

In [99]:
predict(ridge.mod, s=best.lambda, type="coefficients")[1:6, ] # Con estos coeficientes calculo la semana y calculo el error


(Intercept)
15.0596418628362
Fertility
0.0749997978147319
Agriculture
-0.0152724573860916
Examination
0.00844114165951956
Education
0.00672837148162444
Catholic
0.00444470645608069

In [ ]:
#Ahora que pasa si tambien voy variando los modelos que entran??? para simular un selection forward