4.3 La famille apply, des fonctions d’itération prédéfinies
set.seed(1234)
X <- matrix(sample(1:20,20),ncol=4)
X
[,1] [,2] [,3] [,4]
[1,] 3 10 7 9
[2,] 12 1 5 17
[3,] 11 4 20 16
[4,] 18 8 15 19
[5,] 14 6 2 13
apply(X,MARGIN=2,FUN=mean)
[1] 11.6 5.8 9.8 14.8
X[1,1] <- NA
apply(X,MARGIN=2,FUN=mean)
[1] NA 5.8 9.8 14.8
apply(X,MARGIN=2,FUN=mean,na.rm=TRUE)
[1] 13.75 5.80 9.80 14.80
colMeans(X,na.rm=TRUE)
[1] 13.75 5.80 9.80 14.80
set.seed(1234)
Y <- array(sample(24),dim=c(3,4,2))
Y
, , 1
[,1] [,2] [,3] [,4]
[1,] 3 22 1 8
[2,] 15 18 4 10
[3,] 14 13 11 23
, , 2
[,1] [,2] [,3] [,4]
[1,] 17 19 7 20
[2,] 16 21 6 9
[3,] 24 2 5 12
apply(Y,MARGIN=c(1,2),FUN=sum,na.rm=TRUE)
[,1] [,2] [,3] [,4]
[1,] 20 41 8 28
[2,] 31 39 10 19
[3,] 38 15 16 35
MaFonction <- function(x,y) {
z <- x**2 - y
return(z)
}
set.seed(1234)
X <- matrix(sample(12),ncol=4)
X
[,1] [,2] [,3] [,4]
[1,] 2 6 1 8
[2,] 7 10 12 4
[3,] 11 5 3 9
apply(X,MARGIN=c(1,2),FUN=MaFonction, y=2)
[,1] [,2] [,3] [,4]
[1,] 2 34 -1 62
[2,] 47 98 142 14
[3,] 119 23 7 79
Z <- 1:5
Z
[1] 1 2 3 4 5
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec1
[1] "A1" "A1" "A2" "A2" "A3"
vec2 <- c(rep("B1",3),rep("B2",2))
vec2
[1] "B1" "B1" "B1" "B2" "B2"
tapply(Z,vec1,sum)
A1 A2 A3
3 7 5
tapply(Z,list(vec1,vec2),sum)
B1 B2
A1 3 NA
A2 3 4
A3 NA 5
set.seed(545)
mat1 <- matrix(sample(12),ncol=4)
mat1
[,1] [,2] [,3] [,4]
[1,] 9 4 1 11
[2,] 10 2 3 6
[3,] 7 5 8 12
mat2 <- matrix(sample(4),ncol=2)
mat2
[,1] [,2]
[1,] 4 3
[2,] 2 1
liste <- list(matrice1=mat1,matrice2=mat2)
lapply(liste,mean)
$`matrice1`
[1] 6.5
$matrice2
[1] 2.5
lapply(liste,apply,2,sum,na.rm=T)
$`matrice1`
[1] 26 11 12 29
$matrice2
[1] 6 4
Z <- 1:5
T <- 5:1
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec2 <- c(rep("B1",3),rep("B2",2))
df <- data.frame(Z,T,vec1,vec2)
df
aggregate(df[,1:2],list(FacteurA=vec1),sum)
aggregate(df[,1:2],list(FacteurA=vec1,FacteurB=vec2),sum)
set.seed(1234)
X <- matrix(sample(12),nrow=3)
X
[,1] [,2] [,3] [,4]
[1,] 2 6 1 8
[2,] 7 10 12 4
[3,] 11 5 3 9
mean.X <- apply(X,2,mean)
mean.X
[1] 6.666667 7.000000 5.333333 7.000000
sd.X <- apply(X,2,sd)
sd.X
[1] 4.509250 2.645751 5.859465 2.645751
Xc <- sweep(X,2,mean.X,FUN="-")
Xc
[,1] [,2] [,3] [,4]
[1,] -4.6666667 -1 -4.333333 1
[2,] 0.3333333 3 6.666667 -3
[3,] 4.3333333 -2 -2.333333 2
Xcr <- sweep(Xc,2,sd.X,FUN="/")
Xcr
[,1] [,2] [,3] [,4]
[1,] -1.03490978 -0.3779645 -0.7395442 0.3779645
[2,] 0.07392213 1.1338934 1.1377602 -1.1338934
[3,] 0.96098765 -0.7559289 -0.3982161 0.7559289
set.seed(1234)
T <- rnorm(50)
Z <- rnorm(50)+3*T+5
vec1 <- c(rep("A1",20),rep("A2",30))
don <- data.frame(Z,T)
by(don,list(FacteurA=vec1),summary)
FacteurA: A1
Z T
Min. :-3.052 Min. :-2.3457
1st Qu.: 2.624 1st Qu.:-0.8504
Median : 4.539 Median :-0.5288
Mean : 4.417 Mean :-0.2507
3rd Qu.: 6.205 3rd Qu.: 0.3154
Max. :12.584 Max. : 2.4158
----------------------------------------------------------------------------------
FacteurA: A2
Z T
Min. :-0.9901 Min. :-2.1800
1st Qu.: 1.6098 1st Qu.:-1.0574
Median : 3.1922 Median :-0.6088
Mean : 3.3560 Mean :-0.5880
3rd Qu.: 5.1170 3rd Qu.:-0.3309
Max. : 9.2953 Max. : 1.4495
by(don,list(FacteurA=vec1),sum)
FacteurA: A1
[1] 83.32415
----------------------------------------------------------------------------------
FacteurA: A2
[1] 83.04172
mafonction <- function(x){
summary(lm(Z~T, data=x))$coef
}
by(don, vec1, mafonction)
vec1: A1
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.166501 0.2900179 17.81442 7.030010e-13
T 2.990575 0.2844888 10.51210 4.118736e-09
----------------------------------------------------------------------------------
vec1: A2
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.024887 0.2123503 23.66320 4.718878e-20
T 2.838283 0.2203563 12.88043 2.753530e-13
set.seed(1234)
replicate(n=8, mean(rnorm(100)))
[1] -0.1567617424 0.0412431799 0.1546036721 -0.0081051362 -0.0217858703 -0.1368770057 -0.0878617963
[8] -0.0008371926
Mois <- c("Jan","Fév","Mar")
Année <- 2008:2010
outer(Mois,Année,FUN="paste")
[,1] [,2] [,3]
[1,] "Jan 2008" "Jan 2009" "Jan 2010"
[2,] "Fév 2008" "Fév 2009" "Fév 2010"
[3,] "Mar 2008" "Mar 2009" "Mar 2010"
outer(Mois,Année,FUN="paste",sep="-")
[,1] [,2] [,3]
[1,] "Jan-2008" "Jan-2009" "Jan-2010"
[2,] "Fév-2008" "Fév-2009" "Fév-2010"
[3,] "Mar-2008" "Mar-2009" "Mar-2010"
---
title: "Chapitre 4 : programmer"
author: "Husson et al."
date: "09/09/2018"
output:
  html_notebook:
    toc: yes
    toc_depth: 3
    toc_float: yes
  html_document:
    toc: yes
    toc_depth: '3'
    toc_float: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
```

# 4.1.2 Les boucles (for ou while)

```{r,message=FALSE,warning=FALSE,results='hide'}
for (i in 1:99) print(i)
for (i in seq(1,99,by=2)) print(i)
```

```{r,message=FALSE,warning=FALSE}
vecteur <- c("lundi","mardi","mercredi")
for (i in vecteur) print(i)
```

```{r}
i <- 1
while (i<3) {
  print(i)
  i <- i+1 }
```

# 4.1.3 Les conditions (if, else)

```{r}
i <- 1
repeat {
  print(i)
  i <- i+1
  if (i>3) break }
```

```{r}
X <- matrix(0,5,5)
switch(class(X),
  "matrix" = print("X est une matrice"),
  "data.frame" = print("X est un data.frame"),
  "numeric" = print("X est de classe numérique")
)
```

# 4.2 Construire une fonction

```{r}
som <- function(n) {
  resultat <- sum(1:n)
    return(resultat)
}
som(3)
res <- som(3)
res
```
```{r}
som <- function(n) {
  if (n<=0) stop("l’entier doit être strictement positif")
  if (ceiling(n)!=n) warning(paste("arrondi de",n,"en",ceiling(n)))
  resultat <- sum(1:ceiling(n))
  return(resultat)
}
som(4.325)
```
```{r}
mafonc <- function(facteur1,facteur2) {
  res1 <- table(facteur1,facteur2)
  selection <- which(res1==0,arr.ind = TRUE)
  res2 <- matrix("",nrow=nrow(selection),ncol=2)
  res2[,1] <- levels(facteur1)[selection[,1]]
  res2[,2] <- levels(facteur2)[selection[,2]]
  return(list(tab=res1,niveau=res2))
}
```
```{r}
tension <- factor(c(rep("Faible",5),rep("Forte",5)))
laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4)))
mafonc(tension,laine)
```

# 4.3 La famille apply, des fonctions d’itération prédéfinies
 
```{r}
set.seed(1234)
X <- matrix(sample(1:20,20),ncol=4)
X
apply(X,MARGIN=2,FUN=mean)
```
```{r}
X[1,1] <- NA
apply(X,MARGIN=2,FUN=mean)
apply(X,MARGIN=2,FUN=mean,na.rm=TRUE)
colMeans(X,na.rm=TRUE)
```
```{r}
set.seed(1234)
Y <- array(sample(24),dim=c(3,4,2))
Y
apply(Y,MARGIN=c(1,2),FUN=sum,na.rm=TRUE)
```
```{r}
MaFonction <- function(x,y) {
  z <- x**2 - y
  return(z)
}
set.seed(1234)
X <- matrix(sample(12),ncol=4)
X
apply(X,MARGIN=c(1,2),FUN=MaFonction, y=2)
```
```{r}
Z <- 1:5
Z
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec1
vec2 <- c(rep("B1",3),rep("B2",2))
vec2
tapply(Z,vec1,sum)
tapply(Z,list(vec1,vec2),sum)
```
```{r}
set.seed(545)
mat1 <- matrix(sample(12),ncol=4)
mat1
mat2 <- matrix(sample(4),ncol=2)
mat2
liste <- list(matrice1=mat1,matrice2=mat2)
lapply(liste,mean)
lapply(liste,apply,2,sum,na.rm=T)
```
```{r}
Z <- 1:5
T <- 5:1
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec2 <- c(rep("B1",3),rep("B2",2))
df <- data.frame(Z,T,vec1,vec2)
df
aggregate(df[,1:2],list(FacteurA=vec1),sum)
aggregate(df[,1:2],list(FacteurA=vec1,FacteurB=vec2),sum)
```
```{r}
set.seed(1234)
X <- matrix(sample(12),nrow=3)
X
mean.X <- apply(X,2,mean)
mean.X
sd.X <- apply(X,2,sd)
sd.X
Xc <- sweep(X,2,mean.X,FUN="-")
Xc
Xcr <- sweep(Xc,2,sd.X,FUN="/")
Xcr
```
```{r}
set.seed(1234)
T <- rnorm(50)
Z <- rnorm(50)+3*T+5
vec1 <- c(rep("A1",20),rep("A2",30))
don <- data.frame(Z,T)
by(don,list(FacteurA=vec1),summary)
by(don,list(FacteurA=vec1),sum)
```
```{r}
mafonction <- function(x){
  summary(lm(Z~T, data=x))$coef
}
by(don, vec1, mafonction)
set.seed(1234)
replicate(n=8, mean(rnorm(100)))
Mois <- c("Jan","Fév","Mar")
Année <- 2008:2010
outer(Mois,Année,FUN="paste")
outer(Mois,Année,FUN="paste",sep="-")
```

# 4.4.2 Le package parallel

```{r, eval=FALSE}
require(parallel)
vignette("parallel")
```

```{r, message=FALSE, warning=FALSE, eval=FALSE}
require(parallel)
nb_cores <- detectCores()
nb_cores
cl <- makeCluster(nb_cores - 1)
res <- clusterCall(cl = cl, fun = function() return(1:4))
stopCluster(cl)
str(res)
```

# 4.4.3 Le package foreach

```{r, message=FALSE, warning=FALSE, eval=FALSE}
require(foreach)
vignette("foreach")
```
```{r, message=FALSE, warning=FALSE}
require(foreach)
x <- foreach(i = 1:3) %do% (round(sqrt(i),2))
str(x)
```

```{r, message=FALSE, warning=FALSE}
require(foreach)
x <- foreach(i = 1:3, .combine = "+") %do% sqrt(i)
x
require(numbers)
foreach(n = 1:50, .combine = "c") %:% when (isPrime(n)) %do% n
```

```{r, message=FALSE, warning=FALSE}
require(doParallel)
cl <- makeCluster(3)
registerDoParallel(cl) # enregistrement du cluster
res <- foreach(n = 1:3) %dopar% rnorm(1000)
stopCluster(cl)
```

# 4.4.4 Exemple avancé

```{r, message=FALSE, warning=FALSE}
require(kernlab)
data(spam)
set.seed(125)
spam$fold <- sample(1:4, nrow(spam), replace = TRUE)
table(spam$type, spam$fold)
cv_rf <- function(data_app, data_val){
  rf <- randomForest(type ~ ., data=data_app)
  y_val <- data.frame(type=data_val$type, y=predict(rf, newdata=data_val))
  list(rf=rf, y_val, err_rate=mean(y_val$y != y_val$type))
}
```
```{r, message=FALSE, warning=FALSE}
require(parallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
clusterExport(cl, varlist = c("spam", "cv_rf"))
clusterEvalQ(cl, {require(randomForest)})
res <- clusterApply(cl = cl, x = 1:4, fun = function(fold){
  spam_app <- spam[spam$fold != fold, ] # creation apprentissage
  spam_val <- spam[spam$fold == fold, ] # creation validation
  spam_app$fold <- NULL # suppression fold
  cv_rf(spam_app, spam_val) # calculs
})
stopCluster(cl)
sapply(res, function(x) x$err_rate)
```

```{r, message=FALSE, warning=FALSE}
require(foreach)
require(doParallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
registerDoParallel(cl)
res <- foreach(fold = 1:4, .packages = "randomForest",
  .noexport = setdiff(ls(), c("spam", "cv_rf"))) %dopar% {
    spam_app <- spam[spam$fold != fold, ]
    spam_val <- spam[spam$fold == fold, ]
    spam_app$fold <- NULL
    cv_rf(spam_app, spam_val)
  }
stopCluster(cl)
```


