1. Visualiser le dispositif de données manquantes

don <- read.table("https://r-stat-sc-donnees.github.io/ozoneNA.csv",header=TRUE,sep=";",row.names=1)
nrow(na.omit(don))
library(VIM)
res<-summary(aggr(don,sortVar=TRUE))$combinations

matrixplot(don,sortby = 2)

marginplot(don[,c("T9","maxO3")])

tabNA <- matrix("p",nrow=nrow(don),ncol=ncol(don))
tabNA[is.na(don)] <- "a"
dimnames(tabNA) <- dimnames(don)
library(FactoMineR)
res.mca <- MCA(tabNA, graph=FALSE) 
plot(res.mca, invisible="ind")

2. Imputation simple

library(missForest)
missForest(don)
library(missMDA)
nb <- estim_ncpPCA(don)
imp <- imputePCA(don,ncp=nb$ncp)
imp$completeObs[1:3,1:8] # jeu imputé
         maxO3      T9      T12      T15 Ne9     Ne12     Ne15     Vx9
20010601    87 15.6000 18.50000 20.47146   4 4.000000 8.000000  0.6946
20010602    82 18.5047 20.86986 21.79932   5 5.000000 7.000000 -4.3301
20010603    92 15.3000 17.60000 19.50000   2 3.984066 3.812104  2.9544

3. Imputation multiple

library(mice)
imp.mice <- mice(don,m=100,defaultMethod="norm.boot")
library(Amelia)
res.amelia <- amelia(don,m=100)
library(missMDA)
nb <- estim_ncpPCA(don)
impMI <- MIPCA(don,ncp=nb$ncp,nboot=100)
compare.density(res.amelia, var = "T12")

plot(impMI, choice= "ind.supp")

plot(impMI, choice= "var")

4. Modélisation avec données manquantes

set.seed(1234)
imp <- mice(don, m=100, defaultMethod="norm.boot", printFlag=FALSE) 
res.lm.mice <- with(imp, lm(maxO3 ~ T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v))
res.mi <- MIPCA(don, nboot=100) 
imp <- prelim(res.mi, don)
res.lm <- with(imp,
lm(maxO3 ~ T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v))
resDF <- lapply(res.amelia$imputations, as.data.frame) 
res.lm <- lapply(resDF, lm,
formula="maxO3~ T9+T12+T15+Ne9+Ne12+Ne15+Vx9+Vx12+Vx15+maxO3v") 
res.lm <- as.mira(res.lm)
pool.res <- pool(res.lm.mice)
summary(pool.res, conf.int=TRUE)
              estimate  std.error   statistic       df     p.value       2.5 %     97.5 %
(Intercept) 24.5358851 18.9919600  1.29190906 39.70751 0.202740034 -13.8571072 62.9288774
T9          -0.2319135  2.5578928 -0.09066585 25.65731 0.928145532  -5.4931574  5.0293304
T12          3.2487674  2.8461215  1.14147176 24.13021 0.259487967  -2.6236618  9.1211965
T15         -0.3076715  1.6998690 -0.18099716 31.26000 0.857152358  -3.7734088  3.1580659
Ne9         -2.2444808  1.6245660 -1.38158794 25.18412 0.173666797  -5.5890972  1.1001356
Ne12        -1.5743673  3.1197510 -0.50464518 18.71642 0.616178999  -8.1107837  4.9620492
Ne15         0.1525583  1.9136439  0.07972138 24.18250 0.936799203  -3.7954319  4.1005486
Vx9          0.4255790  1.4041949  0.30307688 46.74180 0.763176109  -2.3997088  3.2508668
Vx12         0.7069736  1.7562157  0.40255510 39.16655 0.689110734  -2.8448250  4.2587722
Vx15         0.3524335  1.4364495  0.24535043 37.74792 0.807259170  -2.5561444  3.2610115
maxO3v       0.3215322  0.1050211  3.06159513 46.18596 0.003643674   0.1101586  0.5329058
pool.res

Pour aller plus loin

library(norm)
pre <- prelim.norm(as.matrix(don)) # manipulations préliminaires
thetahat <- em.norm(pre) # estimation par MV
getparam.norm(pre,thetahat)
LS0tDQp0aXRsZTogIkdlc3Rpb24gZGUgZG9ubsOpZXMgbWFucXVhbnRlcyINCmF1dGhvcjogIkh1c3NvbiBldCBhbC4iDQpkYXRlOiAiMDUvMDkvMjAxOCINCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMw0KICAgIHRvY19mbG9hdDogeWVzDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZGVwdGg6ICczJw0KICAgIHRvY19mbG9hdDogeWVzDQotLS0NCg0KDQojIDEuIFZpc3VhbGlzZXIgbGUgZGlzcG9zaXRpZiBkZSBkb25uw6llcyBtYW5xdWFudGVzDQoNCmBgYHtyLHJlc3VsdHM9ImhpZGUiLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmRvbiA8LSByZWFkLnRhYmxlKCJodHRwczovL3Itc3RhdC1zYy1kb25uZWVzLmdpdGh1Yi5pby9vem9uZU5BLmNzdiIsaGVhZGVyPVRSVUUsc2VwPSI7Iixyb3cubmFtZXM9MSkNCm5yb3cobmEub21pdChkb24pKQ0KYGBgDQoNCmBgYHtyLHJlc3VsdHMgPSAiaGlkZSIsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShWSU0pDQpyZXM8LXN1bW1hcnkoYWdncihkb24sc29ydFZhcj1UUlVFKSkkY29tYmluYXRpb25zDQpgYGANCg0KYGBge3IscmVzdWx0cz0iaGlkZSIsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbWF0cml4cGxvdChkb24sc29ydGJ5ID0gMikNCm1hcmdpbnBsb3QoZG9uWyxjKCJUOSIsIm1heE8zIildKQ0KYGBgDQpgYGB7cixyZXN1bHRzPSJoaWRlIixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQp0YWJOQSA8LSBtYXRyaXgoInAiLG5yb3c9bnJvdyhkb24pLG5jb2w9bmNvbChkb24pKQ0KdGFiTkFbaXMubmEoZG9uKV0gPC0gImEiDQpkaW1uYW1lcyh0YWJOQSkgPC0gZGltbmFtZXMoZG9uKQ0KbGlicmFyeShGYWN0b01pbmVSKQ0KcmVzLm1jYSA8LSBNQ0EodGFiTkEsIGdyYXBoPUZBTFNFKSANCnBsb3QocmVzLm1jYSwgaW52aXNpYmxlPSJpbmQiKQ0KYGBgDQoNCiMgMi4gSW1wdXRhdGlvbiBzaW1wbGUNCg0KYGBge3IscmVzdWx0cz0iaGlkZSIsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShtaXNzRm9yZXN0KQ0KbWlzc0ZvcmVzdChkb24pDQpgYGANCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShtaXNzTURBKQ0KbmIgPC0gZXN0aW1fbmNwUENBKGRvbikNCmltcCA8LSBpbXB1dGVQQ0EoZG9uLG5jcD1uYiRuY3ApDQppbXAkY29tcGxldGVPYnNbMTozLDE6OF0gIyBqZXUgaW1wdXTDqQ0KYGBgDQoNCiMgMy4gSW1wdXRhdGlvbiBtdWx0aXBsZQ0KDQpgYGB7cixyZXN1bHRzPSJoaWRlIixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KG1pY2UpDQppbXAubWljZSA8LSBtaWNlKGRvbixtPTEwMCxkZWZhdWx0TWV0aG9kPSJub3JtLmJvb3QiKQ0KDQpsaWJyYXJ5KEFtZWxpYSkNCnJlcy5hbWVsaWEgPC0gYW1lbGlhKGRvbixtPTEwMCkNCg0KbGlicmFyeShtaXNzTURBKQ0KbmIgPC0gZXN0aW1fbmNwUENBKGRvbikNCmltcE1JIDwtIE1JUENBKGRvbixuY3A9bmIkbmNwLG5ib290PTEwMCkNCmBgYA0KDQpgYGB7cixyZXN1bHRzPSJoaWRlIixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpjb21wYXJlLmRlbnNpdHkocmVzLmFtZWxpYSwgdmFyID0gIlQxMiIpDQpgYGANCg0KYGBge3IscmVzdWx0cz0iaGlkZSIsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KcGxvdChpbXBNSSwgY2hvaWNlPSAiaW5kLnN1cHAiKQ0KcGxvdChpbXBNSSwgY2hvaWNlPSAidmFyIikNCmBgYA0KDQojIDQuIE1vZMOpbGlzYXRpb24gYXZlYyBkb25uw6llcyBtYW5xdWFudGVzDQoNCmBgYHtyLHJlc3VsdHM9ImhpZGUiLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCnNldC5zZWVkKDEyMzQpDQppbXAgPC0gbWljZShkb24sIG09MTAwLCBkZWZhdWx0TWV0aG9kPSJub3JtLmJvb3QiLCBwcmludEZsYWc9RkFMU0UpIA0KcmVzLmxtLm1pY2UgPC0gd2l0aChpbXAsIGxtKG1heE8zIH4gVDkrVDEyK1QxNStOZTkrTmUxMitOZTE1K1Z4OStWeDEyK1Z4MTUrbWF4TzN2KSkNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpyZXMubWkgPC0gTUlQQ0EoZG9uLCBuYm9vdD0xMDApIA0KaW1wIDwtIHByZWxpbShyZXMubWksIGRvbikNCnJlcy5sbSA8LSB3aXRoKGltcCwNCmxtKG1heE8zIH4gVDkrVDEyK1QxNStOZTkrTmUxMitOZTE1K1Z4OStWeDEyK1Z4MTUrbWF4TzN2KSkNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpyZXNERiA8LSBsYXBwbHkocmVzLmFtZWxpYSRpbXB1dGF0aW9ucywgYXMuZGF0YS5mcmFtZSkgDQpyZXMubG0gPC0gbGFwcGx5KHJlc0RGLCBsbSwNCmZvcm11bGE9Im1heE8zfiBUOStUMTIrVDE1K05lOStOZTEyK05lMTUrVng5K1Z4MTIrVngxNSttYXhPM3YiKSANCnJlcy5sbSA8LSBhcy5taXJhKHJlcy5sbSkNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpwb29sLnJlcyA8LSBwb29sKHJlcy5sbS5taWNlKQ0Kc3VtbWFyeShwb29sLnJlcywgY29uZi5pbnQ9VFJVRSkNCmBgYA0KDQpgYGB7cixyZXN1bHRzPSJoaWRlIixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpwb29sLnJlcw0KYGBgDQoNCiMgUG91ciBhbGxlciBwbHVzIGxvaW4NCg0KYGBge3IscmVzdWx0cz0iaGlkZSIsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShub3JtKQ0KcHJlIDwtIHByZWxpbS5ub3JtKGFzLm1hdHJpeChkb24pKSAjIG1hbmlwdWxhdGlvbnMgcHLDqWxpbWluYWlyZXMNCnRoZXRhaGF0IDwtIGVtLm5vcm0ocHJlKSAjIGVzdGltYXRpb24gcGFyIE1WDQpnZXRwYXJhbS5ub3JtKHByZSx0aGV0YWhhdCkNCmBgYA0KDQo=