1. Importer les données

library(kernlab)
data(spam)
set.seed(5678)
perm <- sample(4601,3000)
app <- spam[perm,]
valid <- spam[-perm,]

2. Construire les modèles lasso et ridge

library(glmnet)
lasso <- glmnet(as.matrix(app[,1:57]),app[,58],family="binomial")
ridge <- glmnet(as.matrix(app[,1:57]),app[,58],family="binomial",alpha=0)
par(mfrow=c(1,2))
plot(lasso)
plot(ridge)

3. Sélectionner le paramètre lambda

set.seed(1234)
Llasso <- cv.glmnet(as.matrix(app[,1:57]),app[,58],family="binomial")
Llasso$lambda.min
[1] 0.000430318
Llasso$lambda.1se
[1] 0.003331796
plot(Llasso)

set.seed(1234)
Lridge <- cv.glmnet(as.matrix(app[,1:57]),app[,58],family="binomial",alpha=0)
set.seed(1234)
Lridge1 <- cv.glmnet(as.matrix(app[,1:57]),app[,58],family="binomial",alpha=0,lambda=exp(seq(-10,-2,length=100)))
par(mfrow=c(1,2))
plot(Lridge)
plot(Lridge1)

4. Faire de la prévision

prev.class.lasso <- predict(Llasso,newx=as.matrix(valid[,1:57]),type="class")
prev.class.ridge <- predict(Lridge1,newx=as.matrix(valid[,1:57]),type="class")
prev.class <- data.frame(Lasso=as.character(prev.class.lasso),Ridge=as.character(prev.class.ridge),obs=valid$type)
head(prev.class)
  Lasso Ridge  obs
1  spam  spam spam
2  spam  spam spam
3  spam  spam spam
4  spam  spam spam
5  spam  spam spam
6  spam  spam spam
prev.lasso <- predict(Llasso,newx=as.matrix(valid[,1:57]),type="response")
prev.ridge <- predict(Lridge1,newx=as.matrix(valid[,1:57]),type="response")
prev.prob <- data.frame(Lasso=as.numeric(prev.lasso),Ridge=as.numeric(prev.ridge),obs=valid$type)
head(prev.prob)
      Lasso     Ridge  obs
1 0.9504035 0.9510156 spam
2 0.5909894 0.6392713 spam
3 0.9960009 0.9974515 spam
4 0.9020829 0.8843358 spam
5 0.9936782 0.9859536 spam
6 0.8985610 0.8686000 spam

5. Estimer les performances d’une régression sous contraintes

library(tidyverse)
prev.class %>% summarise_all(funs(err=mean(obs!=.))) %>% select(-obs_err) %>% round(3)
  poly_err radial_err
1    0.082      0.077
library(plotROC)
df.roc <- prev.prob %>% gather(key=Methode,value=score,Lasso,Ridge)
ggplot(df.roc)+aes(d=obs,m=score,color=Methode)+geom_roc()+theme_classic()

library(pROC)
df.roc %>% group_by(Methode) %>% summarize(AUC=pROC::auc(obs,score))
# A tibble: 2 x 2
  Methode   AUC
  <chr>   <dbl>
1 Lasso   0.962
2 Ridge   0.960
ggplot(df.roc)+aes(d=obs,m=score,color=Methode)+geom_roc()+theme_classic()

library(pROC)
df.roc %>% group_by(Methode) %>% summarize(AUC=pROC::auc(obs,score))
# A tibble: 2 x 2
  Methode   AUC
  <chr>   <dbl>
1 Lasso   0.962
2 Ridge   0.960
LS0tDQp0aXRsZTogIlLDqWdyZXNzaW9uIHNvdXMgY29udHJhaW50ZXMiDQphdXRob3I6ICJIdXNzb24gZXQgYWwuIg0KZGF0ZTogIjYgc2VwdGVtYnJlIDIwMTgiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZGVwdGg6IDMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAnMycNCiAgICB0b2NfZmxvYXQ6IHllcw0KLS0tDQoNCiMgMS4gSW1wb3J0ZXIgbGVzIGRvbm7DqWVzDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoa2VybmxhYikNCmRhdGEoc3BhbSkNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDU2NzgpDQpwZXJtIDwtIHNhbXBsZSg0NjAxLDMwMDApDQphcHAgPC0gc3BhbVtwZXJtLF0NCnZhbGlkIDwtIHNwYW1bLXBlcm0sXQ0KYGBgDQoNCiMgMi4gQ29uc3RydWlyZSBsZXMgbW9kw6hsZXMgbGFzc28gZXQgcmlkZ2UNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShnbG1uZXQpDQpsYXNzbyA8LSBnbG1uZXQoYXMubWF0cml4KGFwcFssMTo1N10pLGFwcFssNThdLGZhbWlseT0iYmlub21pYWwiKQ0KYGBgDQoNCmBgYHtyfQ0KcmlkZ2UgPC0gZ2xtbmV0KGFzLm1hdHJpeChhcHBbLDE6NTddKSxhcHBbLDU4XSxmYW1pbHk9ImJpbm9taWFsIixhbHBoYT0wKQ0KYGBgDQoNCmBgYHtyfQ0KcGFyKG1mcm93PWMoMSwyKSkNCnBsb3QobGFzc28pDQpwbG90KHJpZGdlKQ0KYGBgDQoNCg0KIyAzLiBTw6lsZWN0aW9ubmVyIGxlIHBhcmFtw6h0cmUgbGFtYmRhDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzNCkNCkxsYXNzbyA8LSBjdi5nbG1uZXQoYXMubWF0cml4KGFwcFssMTo1N10pLGFwcFssNThdLGZhbWlseT0iYmlub21pYWwiKQ0KTGxhc3NvJGxhbWJkYS5taW4NCkxsYXNzbyRsYW1iZGEuMXNlDQpgYGANCg0KYGBge3J9DQpwbG90KExsYXNzbykNCmBgYA0KDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzNCkNCkxyaWRnZSA8LSBjdi5nbG1uZXQoYXMubWF0cml4KGFwcFssMTo1N10pLGFwcFssNThdLGZhbWlseT0iYmlub21pYWwiLGFscGhhPTApDQpzZXQuc2VlZCgxMjM0KQ0KTHJpZGdlMSA8LSBjdi5nbG1uZXQoYXMubWF0cml4KGFwcFssMTo1N10pLGFwcFssNThdLGZhbWlseT0iYmlub21pYWwiLGFscGhhPTAsbGFtYmRhPWV4cChzZXEoLTEwLC0yLGxlbmd0aD0xMDApKSkNCnBhcihtZnJvdz1jKDEsMikpDQpwbG90KExyaWRnZSkNCnBsb3QoTHJpZGdlMSkNCmBgYA0KDQojIDQuIEZhaXJlIGRlIGxhIHByw6l2aXNpb24NCg0KYGBge3J9DQpwcmV2LmNsYXNzLmxhc3NvIDwtIHByZWRpY3QoTGxhc3NvLG5ld3g9YXMubWF0cml4KHZhbGlkWywxOjU3XSksdHlwZT0iY2xhc3MiKQ0KcHJldi5jbGFzcy5yaWRnZSA8LSBwcmVkaWN0KExyaWRnZTEsbmV3eD1hcy5tYXRyaXgodmFsaWRbLDE6NTddKSx0eXBlPSJjbGFzcyIpDQpwcmV2LmNsYXNzIDwtIGRhdGEuZnJhbWUoTGFzc289YXMuY2hhcmFjdGVyKHByZXYuY2xhc3MubGFzc28pLFJpZGdlPWFzLmNoYXJhY3RlcihwcmV2LmNsYXNzLnJpZGdlKSxvYnM9dmFsaWQkdHlwZSkNCmhlYWQocHJldi5jbGFzcykNCmBgYA0KDQoNCg0KYGBge3J9DQpwcmV2Lmxhc3NvIDwtIHByZWRpY3QoTGxhc3NvLG5ld3g9YXMubWF0cml4KHZhbGlkWywxOjU3XSksdHlwZT0icmVzcG9uc2UiKQ0KcHJldi5yaWRnZSA8LSBwcmVkaWN0KExyaWRnZTEsbmV3eD1hcy5tYXRyaXgodmFsaWRbLDE6NTddKSx0eXBlPSJyZXNwb25zZSIpDQpwcmV2LnByb2IgPC0gZGF0YS5mcmFtZShMYXNzbz1hcy5udW1lcmljKHByZXYubGFzc28pLFJpZGdlPWFzLm51bWVyaWMocHJldi5yaWRnZSksb2JzPXZhbGlkJHR5cGUpDQpoZWFkKHByZXYucHJvYikNCmBgYA0KDQojIDUuIEVzdGltZXIgbGVzIHBlcmZvcm1hbmNlcyBk4oCZdW5lIHLDqWdyZXNzaW9uIHNvdXMgY29udHJhaW50ZXMNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpwcmV2LmNsYXNzICU+JSBzdW1tYXJpc2VfYWxsKGZ1bnMoZXJyPW1lYW4ob2JzIT0uKSkpICU+JSBzZWxlY3QoLW9ic19lcnIpICU+JSByb3VuZCgzKQ0KYGBgDQoNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShwbG90Uk9DKQ0KZGYucm9jIDwtIHByZXYucHJvYiAlPiUgZ2F0aGVyKGtleT1NZXRob2RlLHZhbHVlPXNjb3JlLExhc3NvLFJpZGdlKQ0KZ2dwbG90KGRmLnJvYykrYWVzKGQ9b2JzLG09c2NvcmUsY29sb3I9TWV0aG9kZSkrZ2VvbV9yb2MoKSt0aGVtZV9jbGFzc2ljKCkNCmBgYA0KDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkocFJPQykNCmRmLnJvYyAlPiUgZ3JvdXBfYnkoTWV0aG9kZSkgJT4lIHN1bW1hcml6ZShBVUM9cFJPQzo6YXVjKG9icyxzY29yZSkpDQpnZ3Bsb3QoZGYucm9jKSthZXMoZD1vYnMsbT1zY29yZSxjb2xvcj1NZXRob2RlKStnZW9tX3JvYygpK3RoZW1lX2NsYXNzaWMoKQ0KYGBgDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkocFJPQykNCmRmLnJvYyAlPiUgZ3JvdXBfYnkoTWV0aG9kZSkgJT4lIHN1bW1hcml6ZShBVUM9cFJPQzo6YXVjKG9icyxzY29yZSkpDQpgYGANCg0K