1. Importer les données
library(kernlab)
data(spam)
summary(spam[,56:58])
capitalLong capitalTotal type
Min. : 1.00 Min. : 1.0 nonspam:2788
1st Qu.: 6.00 1st Qu.: 35.0 spam :1813
Median : 15.00 Median : 95.0
Mean : 52.17 Mean : 283.3
3rd Qu.: 43.00 3rd Qu.: 266.0
Max. :9989.00 Max. :15841.0
2. Découper en une partie apprentissage et une partie validation
set.seed(1234)
perm <- sample(4601,round(4601*.8))
app <- spam[perm,]
valid <- spam[-perm,]
3. Optimiser le(s) paramètre(s) de l’algorithme avec les fonctions prévues du package ou avec le package caret
library(glmnet)
set.seed(123)
optlasso <- cv.glmnet(as.matrix(app[,-58]),app[,58],family="binomial", nfold=10, type.measure="class")
optlasso$lambda.min
[1] 0.000302071
prevlasso <- predict(optlasso,newx=as.matrix(valid[,-58]), type="class",s=c("lambda.min"))
library(caret)
ctrl <- trainControl(method="cv",number=10,classProbs=TRUE)
library(doParallel) # pour paralléliser
cl <- makePSOCKcluster(4)
registerDoParallel(cl) ## les clusters seront fermés en fin de programme
set.seed(123)
sel.mtry <- train(type~.,data=app,method="rf",trControl=ctrl, tuneGrid=data.frame(mtry=seq(1,51,by=10)), type.measure="class")
stopCluster(cl)
prevforet <- predict(sel.mtry, valid)
prev.methode <- data.frame(lasso=as.vector(prevlasso),foret=prevforet, obs=valid$type)
library(tidyverse)
prev.methode %>% summarise_all(funs(err=mean(obs!=.))) %>% select(-obs_err) %>% round(3)
lasso_err foret_err
1 0.051 0.041
4. Proposer un modèle final
cl <- makePSOCKcluster(4)
registerDoParallel(cl) ## les clusters seront fermés en fin de programme
ctrl <- trainControl(method="cv",number=10,classProbs=TRUE)
set.seed(123)
model_final <- train(type~.,data=spam,method="rf",trControl=ctrl, tuneGrid=data.frame(mtry=seq(1,51,by=10)), type.measure="class")
stopCluster(cl)
LS0tDQp0aXRsZTogIkNvbXBhcmFpc29uIGRlIG3DqXRob2RlcyINCmF1dGhvcjogIkh1c3NvbiBldCBhbC4iDQpkYXRlOiAiNiBzZXB0ZW1icmUgMjAxOCINCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMw0KICAgIHRvY19mbG9hdDogeWVzDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZGVwdGg6ICczJw0KICAgIHRvY19mbG9hdDogeWVzDQotLS0NCg0KDQojIDEuIEltcG9ydGVyIGxlcyBkb25uw6llcw0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KGtlcm5sYWIpDQpkYXRhKHNwYW0pDQpzdW1tYXJ5KHNwYW1bLDU2OjU4XSkNCmBgYA0KDQojIDIuIETDqWNvdXBlciBlbiB1bmUgcGFydGllIGFwcHJlbnRpc3NhZ2UgZXQgdW5lIHBhcnRpZSB2YWxpZGF0aW9uDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzNCkNCnBlcm0gPC0gc2FtcGxlKDQ2MDEscm91bmQoNDYwMSouOCkpDQphcHAgPC0gc3BhbVtwZXJtLF0NCnZhbGlkIDwtIHNwYW1bLXBlcm0sXQ0KYGBgDQoNCiMgMy4gT3B0aW1pc2VyIGxlKHMpIHBhcmFtw6h0cmUocykgZGUgbOKAmWFsZ29yaXRobWUgYXZlYyBsZXMgZm9uY3Rpb25zIHByw6l2dWVzIGR1IHBhY2thZ2Ugb3UgYXZlYyBsZSBwYWNrYWdlIGNhcmV0DQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoZ2xtbmV0KQ0Kc2V0LnNlZWQoMTIzKQ0Kb3B0bGFzc28gPC0gY3YuZ2xtbmV0KGFzLm1hdHJpeChhcHBbLC01OF0pLGFwcFssNThdLGZhbWlseT0iYmlub21pYWwiLCBuZm9sZD0xMCwgdHlwZS5tZWFzdXJlPSJjbGFzcyIpDQpvcHRsYXNzbyRsYW1iZGEubWluDQpgYGANCg0KYGBge3J9DQpwcmV2bGFzc28gPC0gcHJlZGljdChvcHRsYXNzbyxuZXd4PWFzLm1hdHJpeCh2YWxpZFssLTU4XSksIHR5cGU9ImNsYXNzIixzPWMoImxhbWJkYS5taW4iKSkNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KGNhcmV0KQ0KY3RybCA8LSB0cmFpbkNvbnRyb2wobWV0aG9kPSJjdiIsbnVtYmVyPTEwLGNsYXNzUHJvYnM9VFJVRSkNCmxpYnJhcnkoZG9QYXJhbGxlbCkgICAgICAgICMgcG91ciBwYXJhbGzDqWxpc2VyDQpjbCA8LSBtYWtlUFNPQ0tjbHVzdGVyKDQpDQpyZWdpc3RlckRvUGFyYWxsZWwoY2wpICAgICAjIyBsZXMgY2x1c3RlcnMgc2Vyb250IGZlcm3DqXMgZW4gZmluIGRlIHByb2dyYW1tZQ0Kc2V0LnNlZWQoMTIzKQ0Kc2VsLm10cnkgPC0gdHJhaW4odHlwZX4uLGRhdGE9YXBwLG1ldGhvZD0icmYiLHRyQ29udHJvbD1jdHJsLCB0dW5lR3JpZD1kYXRhLmZyYW1lKG10cnk9c2VxKDEsNTEsYnk9MTApKSwgdHlwZS5tZWFzdXJlPSJjbGFzcyIpDQpzdG9wQ2x1c3RlcihjbCkNCmBgYA0KDQpgYGB7cn0NCnByZXZmb3JldCA8LSBwcmVkaWN0KHNlbC5tdHJ5LCB2YWxpZCkNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpwcmV2Lm1ldGhvZGUgPC0gZGF0YS5mcmFtZShsYXNzbz1hcy52ZWN0b3IocHJldmxhc3NvKSxmb3JldD1wcmV2Zm9yZXQsIG9icz12YWxpZCR0eXBlKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpwcmV2Lm1ldGhvZGUgJT4lIHN1bW1hcmlzZV9hbGwoZnVucyhlcnI9bWVhbihvYnMhPS4pKSkgJT4lIHNlbGVjdCgtb2JzX2VycikgJT4lIHJvdW5kKDMpDQpgYGANCg0KIyA0LiBQcm9wb3NlciB1biBtb2TDqGxlIGZpbmFsDQoNCmBgYHtyfQ0KY2wgPC0gbWFrZVBTT0NLY2x1c3Rlcig0KQ0KcmVnaXN0ZXJEb1BhcmFsbGVsKGNsKSAgICAgIyMgbGVzIGNsdXN0ZXJzIHNlcm9udCBmZXJtw6lzIGVuIGZpbiBkZSBwcm9ncmFtbWUNCmN0cmwgPC0gdHJhaW5Db250cm9sKG1ldGhvZD0iY3YiLG51bWJlcj0xMCxjbGFzc1Byb2JzPVRSVUUpDQpzZXQuc2VlZCgxMjMpDQptb2RlbF9maW5hbCA8LSB0cmFpbih0eXBlfi4sZGF0YT1zcGFtLG1ldGhvZD0icmYiLHRyQ29udHJvbD1jdHJsLCB0dW5lR3JpZD1kYXRhLmZyYW1lKG10cnk9c2VxKDEsNTEsYnk9MTApKSwgdHlwZS5tZWFzdXJlPSJjbGFzcyIpDQpzdG9wQ2x1c3RlcihjbCkNCmBgYA0KDQo=