1. Importer les données

library(HDclassif)
data(crabs)
don <- crabs[ , 4:ncol(crabs)]
summary(don)
       FL              RW              CL              CW              BD       
 Min.   : 7.20   Min.   : 6.50   Min.   :14.70   Min.   :17.10   Min.   : 6.10  
 1st Qu.:12.90   1st Qu.:11.00   1st Qu.:27.27   1st Qu.:31.50   1st Qu.:11.40  
 Median :15.55   Median :12.80   Median :32.10   Median :36.80   Median :13.90  
 Mean   :15.58   Mean   :12.74   Mean   :32.11   Mean   :36.41   Mean   :14.03  
 3rd Qu.:18.05   3rd Qu.:14.30   3rd Qu.:37.23   3rd Qu.:42.00   3rd Qu.:16.60  
 Max.   :23.10   Max.   :20.20   Max.   :47.60   Max.   :54.60   Max.   :21.60  

2 Choix du modèle et du nombre de classes

library(mclust)
res.BIC <- mclustBIC(don, verbose = F)
plot(res.BIC)

summary(res.BIC)
Best BIC values:
             EEV,4       VEE,6       VEE,7
BIC      -2842.298 -2857.31968 -2863.98088
BIC diff     0.000   -15.02184   -21.68305

3. Construction de la classification

mod <- Mclust(don, x = res.BIC)
summary(mod)
---------------------------------------------------- 
Gaussian finite mixture model fitted by EM algorithm 
---------------------------------------------------- 

Mclust EEV (ellipsoidal, equal volume and shape) model with 4 components: 

 log.likelihood   n df       BIC      ICL
      -1241.006 200 68 -2842.298 -2854.29

Clustering table:
 1  2  3  4 
60 55 39 46 
table(mod$classification)

 1  2  3  4 
60 55 39 46 

4. Caractériser les classes

don.comp <- cbind.data.frame(don,classe=factor(mod$classification))
library(FactoMineR)
catdes(don.comp, num.var = 6)

Link between the cluster variable and the quantitative variables
================================================================
        Eta2      P-value
FL 0.3063318 1.700502e-15
RW 0.2819780 4.806351e-14
BD 0.2791240 7.055350e-14
CL 0.2262530 6.562003e-11
CW 0.2025411 1.198917e-09

Description of each cluster by quantitative variables
=====================================================
$`1`
      v.test Mean in category Overall mean sd in category Overall sd      p.value
RW -4.173975         11.57833      12.7385       2.587154   2.566898 2.993307e-05
CW -5.864227         31.42833      36.4145       6.924500   7.852251 4.512294e-09
CL -6.513962         27.09667      32.1055       5.998416   7.101163 7.319396e-11
BD -7.219174         11.36000      14.0305       2.758575   3.416200 5.230442e-13
FL -7.322878         12.81833      15.5830       2.668926   3.486576 2.427082e-13

$`2`
     v.test Mean in category Overall mean sd in category Overall sd    p.value
BD 2.567361            15.04      14.0305       3.542654     3.4162 0.01024758

$`3`
     v.test Mean in category Overall mean sd in category Overall sd     p.value
CW 3.131766         39.95641      36.4145       6.160036   7.852251 0.001737583
CL 2.611915         34.77692      32.1055       5.366490   7.101163 0.009003657

$`4`
     v.test Mean in category Overall mean sd in category Overall sd      p.value
RW 7.275734         15.16087      12.7385       2.027901   2.566898 3.445423e-13
FL 5.113948         17.89565      15.5830       2.741545   3.486576 3.154944e-07
BD 4.179923         15.88261      14.0305       2.574567   3.416200 2.916073e-05
CL 3.385706         35.22391      32.1055       5.302919   7.101163 7.099542e-04
CW 3.287811         39.76304      36.4145       5.834133   7.852251 1.009695e-03

Pour aller plus loin

load(url("https://r-stat-sc-donnees.github.io/USPS358.Rdata"))  ### charger le jeu de données sur votre ordinateur
image(matrix(t(X[5,]), ncol = 16))

res.hddc <- hddc(X, 3)
      model K threshold         BIC
1 AKJBKQKDK 3       0.2 -613,317.19

SELECTED: model  AKJBKQKDK  with  3  clusters.
Selection Criterion: BIC.
table(res.hddc$class,cls)
   cls
      1   2   3
  1  19   2 492
  2  43 549  26
  3 596   5  24
adjustedRandIndex(res.hddc$class,cls)
[1] 0.8055273
LS0tDQp0aXRsZTogIk1vZMOobGVzIGRlIG3DqWxhbmdlIg0KYXV0aG9yOiAiSHVzc29uIGV0IGFsLiINCmRhdGU6ICIwOS8wOS8yMDE4Ig0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOg0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogJzMnDQogICAgdG9jX2Zsb2F0OiB5ZXMNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgY2FjaGUgPSBUUlVFKQ0KYGBgDQoNCiMgMS4gSW1wb3J0ZXIgbGVzIGRvbm7DqWVzDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoSERjbGFzc2lmKQ0KZGF0YShjcmFicykNCmRvbiA8LSBjcmFic1sgLCA0Om5jb2woY3JhYnMpXQ0Kc3VtbWFyeShkb24pDQpgYGANCg0KIyAyIENob2l4IGR1IG1vZMOobGUgZXQgZHUgbm9tYnJlIGRlIGNsYXNzZXMNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShtY2x1c3QpDQpyZXMuQklDIDwtIG1jbHVzdEJJQyhkb24sIHZlcmJvc2UgPSBGKQ0KcGxvdChyZXMuQklDKQ0Kc3VtbWFyeShyZXMuQklDKQ0KYGBgDQojIDMuIENvbnN0cnVjdGlvbiBkZSBsYSBjbGFzc2lmaWNhdGlvbg0KDQpgYGB7cn0NCm1vZCA8LSBNY2x1c3QoZG9uLCB4ID0gcmVzLkJJQykNCnN1bW1hcnkobW9kKQ0KdGFibGUobW9kJGNsYXNzaWZpY2F0aW9uKQ0KYGBgDQoNCiMgNC4gQ2FyYWN0w6lyaXNlciBsZXMgY2xhc3Nlcw0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpkb24uY29tcCA8LSBjYmluZC5kYXRhLmZyYW1lKGRvbixjbGFzc2U9ZmFjdG9yKG1vZCRjbGFzc2lmaWNhdGlvbikpDQpgYGANCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShGYWN0b01pbmVSKQ0KY2F0ZGVzKGRvbi5jb21wLCBudW0udmFyID0gNikNCmBgYA0KDQojIFBvdXIgYWxsZXIgcGx1cyBsb2luDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxvYWQodXJsKCJodHRwczovL3Itc3RhdC1zYy1kb25uZWVzLmdpdGh1Yi5pby9VU1BTMzU4LlJkYXRhIikpICAjIyMgY2hhcmdlIGxlIGpldSBkZSBkb25uw6llcyBzdXIgdm90cmUgb3JkaW5hdGV1cg0KaW1hZ2UobWF0cml4KHQoWFs1LF0pLCBuY29sID0gMTYpKQ0KYGBgDQoNCmBgYHtyfQ0KcmVzLmhkZGMgPC0gaGRkYyhYLCAzKQ0KdGFibGUocmVzLmhkZGMkY2xhc3MsY2xzKQ0KYWRqdXN0ZWRSYW5kSW5kZXgocmVzLmhkZGMkY2xhc3MsY2xzKQ0KYGBgDQoNCg0K