1. Importer les données
donnees <- read.table("https://r-stat-sc-donnees.github.io/ronfle.txt",header=TRUE)
summary(donnees)
age poids taille alcool sexe ronfle taba
Min. :23.00 Min. : 42.00 Min. :158.0 Min. : 0.00 F:25 N:65 N:36
1st Qu.:43.00 1st Qu.: 77.00 1st Qu.:166.0 1st Qu.: 0.00 H:75 O:35 O:64
Median :52.00 Median : 95.00 Median :186.0 Median : 2.00
Mean :52.27 Mean : 90.41 Mean :181.1 Mean : 2.95
3rd Qu.:62.25 3rd Qu.:107.00 3rd Qu.:194.0 3rd Qu.: 4.25
Max. :74.00 Max. :120.00 Max. :208.0 Max. :15.00
2. Construire le modèle
library(MASS)
mod.complet <- lda(ronfle~.,data=donnees)
mod.complet
Call:
lda(ronfle ~ ., data = donnees)
Prior probabilities of groups:
N O
0.65 0.35
Group means:
age poids taille alcool sexeH tabaO
N 50.26154 90.47692 180.9538 2.369231 0.6923077 0.6769231
O 56.00000 90.28571 181.3714 4.028571 0.8571429 0.5714286
Coefficients of linear discriminants:
LD1
age 0.05973655
poids -0.01620579
taille 0.01590170
alcool 0.24058822
sexeH 0.55413371
tabaO -1.14621434
plot(mod.complet)

3. Sélectionner des variables
mat.X <- model.matrix(ronfle~.,data=donnees)[,-1]
head(mat.X)
age poids taille alcool sexeH tabaO
1 47 71 158 0 1 1
2 56 58 164 7 1 0
3 46 116 208 3 1 1
4 70 96 186 3 1 1
5 51 91 195 2 1 1
6 46 98 188 0 0 0
library(klaR)
ronfle <- donnees$ronfle
set.seed(1234)
sel <- stepclass(mat.X,donnees$ronfle,method="lda",direction="both",maxvar=6)
correctness rate: 0.65; in: "poids"; variables (1): poids
correctness rate: 0.65; in: "age"; variables (2): poids, age
correctness rate: 0.68; in: "alcool"; variables (3): poids, age, alcool
hr.elapsed min.elapsed sec.elapsed
0.00 0.00 0.89
mod.sel <- lda(sel$formula,data=donnees)
mod.sel
Call:
lda(sel$formula, data = donnees)
Prior probabilities of groups:
N O
0.65 0.35
Group means:
age poids alcool
N 50.26154 90.47692 2.369231
O 56.00000 90.28571 4.028571
Coefficients of linear discriminants:
LD1
age 0.072815712
poids -0.004607728
alcool 0.245088681
4. Estimer le taux de mauvais classement
prev.complet <- lda(ronfle~.,data=donnees,CV=TRUE)$class
prev.sel <- lda(sel$formula,data=donnees,CV=TRUE)$class
table(prev.complet,donnees$ronfle)
prev.complet N O
N 53 22
O 12 13
table(prev.sel,donnees$ronfle)
prev.sel N O
N 55 23
O 10 12
mean(prev.complet!=donnees$ronfle)
[1] 0.34
mean(prev.sel!=donnees$ronfle)
[1] 0.33
5. Faire de la prévision
n_don1 <- matrix(c(42,55,169,0,58,94,185,4,35,70,180,6,67,63,166,3),ncol=4,byrow=T)
n_don2 <- matrix(c("F","N","H","O","H","O","F","N"),ncol=2,byrow=T)
n_donnees <- cbind.data.frame(n_don1,n_don2)
names(n_donnees) <- names(donnees)[-6]
predict(mod.sel,newdata=n_donnees)
$`class`
[1] N N N O
Levels: N O
$posterior
N O
1 0.8582230 0.1417770
2 0.5444034 0.4555966
3 0.7435646 0.2564354
4 0.4308341 0.5691659
$x
LD1
1 -1.3076693
2 0.6580354
3 -0.4159631
4 1.2111277
LS0tDQp0aXRsZTogIkFuYWx5c2UgZGlzY3JpbWluYW50ZSBsaW7DqWFpcmUiDQphdXRob3I6ICJIdXNzb24gZXQgYWwuIg0KZGF0ZTogIjA1LzA5LzIwMTgiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZGVwdGg6IDMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAnMycNCiAgICB0b2NfZmxvYXQ6IHllcw0KLS0tDQoNCiMgMS4gSW1wb3J0ZXIgbGVzIGRvbm7DqWVzDQoNCmBgYHtyfQ0KZG9ubmVlcyA8LSByZWFkLnRhYmxlKCJodHRwczovL3Itc3RhdC1zYy1kb25uZWVzLmdpdGh1Yi5pby9yb25mbGUudHh0IixoZWFkZXI9VFJVRSkNCnN1bW1hcnkoZG9ubmVlcykNCmBgYA0KDQojIDIuIENvbnN0cnVpcmUgbGUgbW9kw6hsZQ0KDQpgYGB7cix3YXJuaW5nPUZBTFNFLG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KE1BU1MpDQptb2QuY29tcGxldCA8LSBsZGEocm9uZmxlfi4sZGF0YT1kb25uZWVzKQ0KbW9kLmNvbXBsZXQNCmBgYA0KDQpgYGB7cn0NCnBsb3QobW9kLmNvbXBsZXQpDQpgYGANCg0KIyAzLiBTw6lsZWN0aW9ubmVyIGRlcyB2YXJpYWJsZXMNCg0KYGBge3J9DQptYXQuWCA8LSBtb2RlbC5tYXRyaXgocm9uZmxlfi4sZGF0YT1kb25uZWVzKVssLTFdDQpoZWFkKG1hdC5YKQ0KYGBgDQoNCmBgYHtyLHdhcm5pbmc9RkFMU0UsbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkoa2xhUikNCnJvbmZsZSA8LSBkb25uZWVzJHJvbmZsZQ0Kc2V0LnNlZWQoMTIzNCkNCnNlbCA8LSBzdGVwY2xhc3MobWF0LlgsZG9ubmVlcyRyb25mbGUsbWV0aG9kPSJsZGEiLGRpcmVjdGlvbj0iYm90aCIsbWF4dmFyPTYpDQpgYGANCg0KYGBge3J9DQptb2Quc2VsIDwtIGxkYShzZWwkZm9ybXVsYSxkYXRhPWRvbm5lZXMpDQptb2Quc2VsDQpgYGANCg0KIyA0LiBFc3RpbWVyIGxlIHRhdXggZGUgbWF1dmFpcyBjbGFzc2VtZW50DQoNCmBgYHtyfQ0KcHJldi5jb21wbGV0IDwtIGxkYShyb25mbGV+LixkYXRhPWRvbm5lZXMsQ1Y9VFJVRSkkY2xhc3MNCnByZXYuc2VsIDwtIGxkYShzZWwkZm9ybXVsYSxkYXRhPWRvbm5lZXMsQ1Y9VFJVRSkkY2xhc3MNCmBgYA0KDQoNCmBgYHtyfQ0KdGFibGUocHJldi5jb21wbGV0LGRvbm5lZXMkcm9uZmxlKQ0KdGFibGUocHJldi5zZWwsZG9ubmVlcyRyb25mbGUpDQpgYGANCg0KYGBge3J9DQptZWFuKHByZXYuY29tcGxldCE9ZG9ubmVlcyRyb25mbGUpDQptZWFuKHByZXYuc2VsIT1kb25uZWVzJHJvbmZsZSkNCmBgYA0KDQoNCiMgNS4gRmFpcmUgZGUgbGEgcHLDqXZpc2lvbg0KYGBge3J9DQpuX2RvbjEgPC0gbWF0cml4KGMoNDIsNTUsMTY5LDAsNTgsOTQsMTg1LDQsMzUsNzAsMTgwLDYsNjcsNjMsMTY2LDMpLG5jb2w9NCxieXJvdz1UKQ0Kbl9kb24yIDwtIG1hdHJpeChjKCJGIiwiTiIsIkgiLCJPIiwiSCIsIk8iLCJGIiwiTiIpLG5jb2w9MixieXJvdz1UKQ0Kbl9kb25uZWVzIDwtIGNiaW5kLmRhdGEuZnJhbWUobl9kb24xLG5fZG9uMikNCm5hbWVzKG5fZG9ubmVlcykgPC0gbmFtZXMoZG9ubmVlcylbLTZdDQpwcmVkaWN0KG1vZC5zZWwsbmV3ZGF0YT1uX2Rvbm5lZXMpDQoNCmBgYA0KDQo=