-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProblemSet_04_Lasso,RidgeRegression.R
207 lines (169 loc) · 5.7 KB
/
ProblemSet_04_Lasso,RidgeRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#####################################################################
### ProblemSet 04: Lasso and Ridge Regression
#####################################################################
### Preface
# clearing workspace and set wd
rm(list=ls())
cat("\014")
dev.off()
#library(plm) # Panel Methods - regression models
library(mvtnorm) # random draws from a multivariate normal distr.
library(MASS) # to fit LDA and QDA analysis
library(glmnet) # Lasso and Ridge regression
# Note: packages above require the data to be saved as a data frame
#####################################################################
### Exercise 1
# data config
N = 300
p = 50
mu = 0
## DGP
# Variance-Covariance Matrix, beta vector
set.seed(123)
temp=runif(p, min = 1, max = 2)
covmat=diag(temp)
beta=runif(p, min = 0.1, max = 0.5)
# sampling data and generate y's
# X = replicate(p , rmvnorm(n = N, mu = mu, Sigma = covmat))
X = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y = X %*% beta + eps
#Y = sin(X %*% beta) + eps
rm(temp, eps)
## a)
# Ridge
grid <- seq(0.5, 0, by=-.001)
#grid <- 10^seq(p, -2, by = -.1)
ridge <- glmnet(X, Y, alpha = 0, lambda = grid)
summary(ridge)
# Lasso
# Note alpha=1 for lasso only and can blend with ridge penalty down to
# alpha=0 ridge only.
lasso <- glmnet(X, Y, alpha=1, lambda = grid)
summary(lasso)
## b)
# generating test data
set.seed(321)
X_test = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y_test = X_test %*% beta + eps
#Y_test = sin(X_test %*% beta) + eps
rm(eps)
rfit <- predict(ridge, newx=X_test)
lfit <- predict(lasso, newx=X_test)
#lerr <- apply(lfit, 2, mean((Y_test-lfit)^2))
#rerr <- apply(rfit, 2, mean((Y_test-rfit)^2))
lerr <- c(rep(NaN, ncol(lfit)))
rerr <- c(rep(NaN, ncol(rfit)))
for (i in 1:ncol(lfit)){
lerr[i] <- mean((Y_test-lfit[,i])^2)
rerr[i] <- mean((Y_test-rfit[,i])^2)
}
par(mfrow=c(3,2))
plot(lasso, main="Lasso")
plot(ridge, main="Ridge")
plot(lasso$lambda, main="Lasso Lambda")
plot(ridge$lambda, main="Ridge Lambda")
plot(lerr, main="Lasso MSE")
plot(rerr, main="Ridge MSE")
## c)
# set s = optimal lambda to optimize cross-validation
cvridge <- cv.glmnet(X, Y, alpha = 0, lambda = grid)
cvlasso <- cv.glmnet(X, Y, alpha=1, lambda = grid)
opt_rlambda <- cvridge$lambda.min
opt_llambda <- cvlasso$lambda.min
cvrfit <- predict(cvridge, s = opt_rlambda, newx = X_test)
cvlfit <- predict(cvlasso, s = opt_llambda, newx = X_test)
cvlerr <- mean((Y_test-cvlfit)^2)
cvrerr <- mean((Y_test-cvrfit)^2)
paste("CV - MSE Ridge Regression is:", format(cvrerr, digit=4))
paste("CV - MSE Lasso Regression is:", format(cvlerr, digit=4))
paste("Difference between Ridge and Lasso is:", format((cvrerr-cvlerr), digit=4))
# Note: since our optimal lambda is zero in the Lasso, we compute simply OLS
#####################################################################
### Exercise 2 - Simulation Study
## a)
rm(list=ls())
cat("\014")
MCN=100
cvlerr <- c(rep(NaN, MCN))
cvrerr <- c(rep(NaN, MCN))
set.seed(123)
for (i in 2:MCN){
# data config
N = 300
p = i
mu = 0
## DGP
# Variance-Covariance Matrix, beta vector
temp=runif(p, min = 1, max = 2)
covmat=diag(temp)
beta=runif(p, min = 0.1, max = 0.5)
# sampling data and generate y's
# X = replicate(p , rmvnorm(n = N, mu = mu, Sigma = covmat))
X = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y = X %*% beta + eps
rm(temp, eps)
cvridge <- cv.glmnet(X, Y, alpha = 0)
cvlasso <- cv.glmnet(X, Y, alpha=1)
X_test = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y_test = X_test %*% beta + eps
rm(eps)
opt_rlambda <- cvridge$lambda.min
opt_llambda <- cvlasso$lambda.min
cvrfit <- predict(cvridge, s = opt_rlambda, newx = X_test)
cvlfit <- predict(cvlasso, s = opt_llambda, newx = X_test)
cvlerr[i] <- mean((Y_test-cvlfit)^2)
cvrerr[i] <- mean((Y_test-cvrfit)^2)
}
par(mfrow=c(1,2))
plot(cvlerr, type="l", main="Lasso Error dep. number of Regressors")
plot(cvrerr, type="l", main="Ridge Error dep. number of Regressors")
### Note: Since we increase the number of regressors in the true model, we loose many degrees of freedom.
# Hence, our estimation becomes less precise!
## b) sparsity of beta - means that the higher the sparsity the more zeros are in our beta vector
rm(list=ls())
cat("\014")
MCN=100
cvlerr <- c(rep(NaN, MCN))
cvrerr <- c(rep(NaN, MCN))
set.seed(123)
for (i in 2:MCN){
# data config
N = 300
p = 50
mu = 0
## DGP
# Variance-Covariance Matrix, beta vector
temp=runif(p, min = 1, max = 2)
covmat=diag(temp)
beta=runif(p, min = 0.1, max = 0.5)
prob=i/(i^2)
binvec=rbinom(p,1,prob)
beta=beta*binvec
# sampling data and generate y's
# X = replicate(p , rmvnorm(n = N, mu = mu, Sigma = covmat))
X = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y = X %*% beta + eps
rm(temp, eps)
cvridge <- cv.glmnet(X, Y, alpha = 0)
cvlasso <- cv.glmnet(X, Y, alpha=1)
X_test = rmvnorm(N, mean = rep(mu, p), sigma = covmat)
eps = rnorm(N, 0, 1^0.5)
Y_test = X_test %*% beta + eps
rm(eps)
opt_rlambda <- cvridge$lambda.min
opt_llambda <- cvlasso$lambda.min
cvrfit <- predict(cvridge, s = opt_rlambda, newx = X_test)
cvlfit <- predict(cvlasso, s = opt_llambda, newx = X_test)
cvlerr[i] <- mean((Y_test-cvlfit)^2)
cvrerr[i] <- mean((Y_test-cvrfit)^2)
}
par(mfrow=c(1,2))
plot(cvlerr, type="l", main="Lasso Error dep. sparsity of Regressors")
plot(cvrerr, type="l", main="Ridge Error dep. sparsity of Regressors")
### Note: since we increase the sparsity of beta, our true model contains more coefficients which are truely zero.
# Therefore Lasso performs better, since lasso allows estimates to be zero!