```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, cache = TRUE) ``` # 4.1.2 Les boucles (for ou while) ```{r,message=FALSE,warning=FALSE,results='hide'} for (i in 1:99) print(i) for (i in seq(1,99,by=2)) print(i) ``` ```{r,message=FALSE,warning=FALSE} vecteur <- c("lundi","mardi","mercredi") for (i in vecteur) print(i) ``` ```{r} i <- 1 while (i<3) { print(i) i <- i+1 } ``` # 4.1.3 Les conditions (if, else) ```{r} i <- 1 repeat { print(i) i <- i+1 if (i>3) break } ``` ```{r} X <- matrix(0,5,5) switch(class(X), "matrix" = print("X est une matrice"), "data.frame" = print("X est un data.frame"), "numeric" = print("X est de classe numérique") ) ``` # 4.2 Construire une fonction ```{r} som <- function(n) { resultat <- sum(1:n) return(resultat) } som(3) res <- som(3) res ``` ```{r} som <- function(n) { if (n<=0) stop("l’entier doit être strictement positif") if (ceiling(n)!=n) warning(paste("arrondi de",n,"en",ceiling(n))) resultat <- sum(1:ceiling(n)) return(resultat) } som(4.325) ``` ```{r} mafonc <- function(facteur1,facteur2) { res1 <- table(facteur1,facteur2) selection <- which(res1==0,arr.ind = TRUE) res2 <- matrix("",nrow=nrow(selection),ncol=2) res2[,1] <- levels(facteur1)[selection[,1]] res2[,2] <- levels(facteur2)[selection[,2]] return(list(tab=res1,niveau=res2)) } ``` ```{r} tension <- factor(c(rep("Faible",5),rep("Forte",5))) laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4))) mafonc(tension,laine) ``` # 4.3 La famille apply, des fonctions d’itération prédéfinies ```{r} set.seed(1234) X <- matrix(sample(1:20,20),ncol=4) X apply(X,MARGIN=2,FUN=mean) ``` ```{r} X[1,1] <- NA apply(X,MARGIN=2,FUN=mean) apply(X,MARGIN=2,FUN=mean,na.rm=TRUE) colMeans(X,na.rm=TRUE) ``` ```{r} set.seed(1234) Y <- array(sample(24),dim=c(3,4,2)) Y apply(Y,MARGIN=c(1,2),FUN=sum,na.rm=TRUE) ``` ```{r} MaFonction <- function(x,y) { z <- x**2 - y return(z) } set.seed(1234) X <- matrix(sample(12),ncol=4) X apply(X,MARGIN=c(1,2),FUN=MaFonction, y=2) ``` ```{r} Z <- 1:5 Z vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1)) vec1 vec2 <- c(rep("B1",3),rep("B2",2)) vec2 tapply(Z,vec1,sum) tapply(Z,list(vec1,vec2),sum) ``` ```{r} set.seed(545) mat1 <- matrix(sample(12),ncol=4) mat1 mat2 <- matrix(sample(4),ncol=2) mat2 liste <- list(matrice1=mat1,matrice2=mat2) lapply(liste,mean) lapply(liste,apply,2,sum,na.rm=T) ``` ```{r} Z <- 1:5 T <- 5:1 vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1)) vec2 <- c(rep("B1",3),rep("B2",2)) df <- data.frame(Z,T,vec1,vec2) df aggregate(df[,1:2],list(FacteurA=vec1),sum) aggregate(df[,1:2],list(FacteurA=vec1,FacteurB=vec2),sum) ``` ```{r} set.seed(1234) X <- matrix(sample(12),nrow=3) X mean.X <- apply(X,2,mean) mean.X sd.X <- apply(X,2,sd) sd.X Xc <- sweep(X,2,mean.X,FUN="-") Xc Xcr <- sweep(Xc,2,sd.X,FUN="/") Xcr ``` ```{r} set.seed(1234) T <- rnorm(50) Z <- rnorm(50)+3*T+5 vec1 <- c(rep("A1",20),rep("A2",30)) don <- data.frame(Z,T) by(don,list(FacteurA=vec1),summary) by(don,list(FacteurA=vec1),sum) ``` ```{r} mafonction <- function(x){ summary(lm(Z~T, data=x))$coef } by(don, vec1, mafonction) set.seed(1234) replicate(n=8, mean(rnorm(100))) Mois <- c("Jan","Fév","Mar") Année <- 2008:2010 outer(Mois,Année,FUN="paste") outer(Mois,Année,FUN="paste",sep="-") ``` # 4.4.2 Le package parallel ```{r, eval=FALSE} require(parallel) vignette("parallel") ``` ```{r, message=FALSE, warning=FALSE, eval=FALSE} require(parallel) nb_cores <- detectCores() nb_cores cl <- makeCluster(nb_cores - 1) res <- clusterCall(cl = cl, fun = function() return(1:4)) stopCluster(cl) str(res) ``` # 4.4.3 Le package foreach ```{r, message=FALSE, warning=FALSE, eval=FALSE} require(foreach) vignette("foreach") ``` ```{r, message=FALSE, warning=FALSE} require(foreach) x <- foreach(i = 1:3) %do% (round(sqrt(i),2)) str(x) ``` ```{r, message=FALSE, warning=FALSE} require(foreach) x <- foreach(i = 1:3, .combine = "+") %do% sqrt(i) x require(numbers) foreach(n = 1:50, .combine = "c") %:% when (isPrime(n)) %do% n ``` ```{r, message=FALSE, warning=FALSE} require(doParallel) cl <- makeCluster(3) registerDoParallel(cl) # enregistrement du cluster res <- foreach(n = 1:3) %dopar% rnorm(1000) stopCluster(cl) ``` # 4.4.4 Exemple avancé ```{r, message=FALSE, warning=FALSE} require(kernlab) data(spam) set.seed(125) spam$fold <- sample(1:4, nrow(spam), replace = TRUE) table(spam$type, spam$fold) cv_rf <- function(data_app, data_val){ rf <- randomForest(type ~ ., data=data_app) y_val <- data.frame(type=data_val$type, y=predict(rf, newdata=data_val)) list(rf=rf, y_val, err_rate=mean(y_val$y != y_val$type)) } ``` ```{r, message=FALSE, warning=FALSE} require(parallel) cl <- makeCluster(2) clusterSetRNGStream(cl,iseed=78) clusterExport(cl, varlist = c("spam", "cv_rf")) clusterEvalQ(cl, {require(randomForest)}) res <- clusterApply(cl = cl, x = 1:4, fun = function(fold){ spam_app <- spam[spam$fold != fold, ] # creation apprentissage spam_val <- spam[spam$fold == fold, ] # creation validation spam_app$fold <- NULL # suppression fold cv_rf(spam_app, spam_val) # calculs }) stopCluster(cl) sapply(res, function(x) x$err_rate) ``` ```{r, message=FALSE, warning=FALSE} require(foreach) require(doParallel) cl <- makeCluster(2) clusterSetRNGStream(cl,iseed=78) registerDoParallel(cl) res <- foreach(fold = 1:4, .packages = "randomForest", .noexport = setdiff(ls(), c("spam", "cv_rf"))) %dopar% { spam_app <- spam[spam$fold != fold, ] spam_val <- spam[spam$fold == fold, ] spam_app$fold <- NULL cv_rf(spam_app, spam_val) } stopCluster(cl) ```