4.1.2 Les boucles (for ou while)

for (i in 1:99) print(i)
for (i in seq(1,99,by=2)) print(i)
vecteur <- c("lundi","mardi","mercredi")
for (i in vecteur) print(i)
[1] "lundi"
[1] "mardi"
[1] "mercredi"
i <- 1
while (i<3) {
  print(i)
  i <- i+1 }
[1] 1
[1] 2

4.1.3 Les conditions (if, else)

i <- 1
repeat {
  print(i)
  i <- i+1
  if (i>3) break }
[1] 1
[1] 2
[1] 3
X <- matrix(0,5,5)
switch(class(X),
  "matrix" = print("X est une matrice"),
  "data.frame" = print("X est un data.frame"),
  "numeric" = print("X est de classe numérique")
)
[1] "X est une matrice"

4.2 Construire une fonction

som <- function(n) {
  resultat <- sum(1:n)
    return(resultat)
}
som(3)
[1] 6
res <- som(3)
res
[1] 6
som <- function(n) {
  if (n<=0) stop("l’entier doit être strictement positif")
  if (ceiling(n)!=n) warning(paste("arrondi de",n,"en",ceiling(n)))
  resultat <- sum(1:ceiling(n))
  return(resultat)
}
som(4.325)
arrondi de 4.325 en 5
[1] 15
mafonc <- function(facteur1,facteur2) {
  res1 <- table(facteur1,facteur2)
  selection <- which(res1==0,arr.ind = TRUE)
  res2 <- matrix("",nrow=nrow(selection),ncol=2)
  res2[,1] <- levels(facteur1)[selection[,1]]
  res2[,2] <- levels(facteur2)[selection[,2]]
  return(list(tab=res1,niveau=res2))
}
tension <- factor(c(rep("Faible",5),rep("Forte",5)))
laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4)))
mafonc(tension,laine)
$`tab`
        facteur2
facteur1 Ang Mer Tex
  Faible   2   3   0
  Forte    1   0   4

$niveau
     [,1]     [,2] 
[1,] "Forte"  "Mer"
[2,] "Faible" "Tex"

4.3 La famille apply, des fonctions d’itération prédéfinies

set.seed(1234)
X <- matrix(sample(1:20,20),ncol=4)
X
     [,1] [,2] [,3] [,4]
[1,]    3   10    7    9
[2,]   12    1    5   17
[3,]   11    4   20   16
[4,]   18    8   15   19
[5,]   14    6    2   13
apply(X,MARGIN=2,FUN=mean)
[1] 11.6  5.8  9.8 14.8
X[1,1] <- NA
apply(X,MARGIN=2,FUN=mean)
[1]   NA  5.8  9.8 14.8
apply(X,MARGIN=2,FUN=mean,na.rm=TRUE)
[1] 13.75  5.80  9.80 14.80
colMeans(X,na.rm=TRUE)
[1] 13.75  5.80  9.80 14.80
set.seed(1234)
Y <- array(sample(24),dim=c(3,4,2))
Y
, , 1

     [,1] [,2] [,3] [,4]
[1,]    3   22    1    8
[2,]   15   18    4   10
[3,]   14   13   11   23

, , 2

     [,1] [,2] [,3] [,4]
[1,]   17   19    7   20
[2,]   16   21    6    9
[3,]   24    2    5   12
apply(Y,MARGIN=c(1,2),FUN=sum,na.rm=TRUE)
     [,1] [,2] [,3] [,4]
[1,]   20   41    8   28
[2,]   31   39   10   19
[3,]   38   15   16   35
MaFonction <- function(x,y) {
  z <- x**2 - y
  return(z)
}
set.seed(1234)
X <- matrix(sample(12),ncol=4)
X
     [,1] [,2] [,3] [,4]
[1,]    2    6    1    8
[2,]    7   10   12    4
[3,]   11    5    3    9
apply(X,MARGIN=c(1,2),FUN=MaFonction, y=2)
     [,1] [,2] [,3] [,4]
[1,]    2   34   -1   62
[2,]   47   98  142   14
[3,]  119   23    7   79
Z <- 1:5
Z
[1] 1 2 3 4 5
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec1
[1] "A1" "A1" "A2" "A2" "A3"
vec2 <- c(rep("B1",3),rep("B2",2))
vec2
[1] "B1" "B1" "B1" "B2" "B2"
tapply(Z,vec1,sum)
A1 A2 A3 
 3  7  5 
tapply(Z,list(vec1,vec2),sum)
   B1 B2
A1  3 NA
A2  3  4
A3 NA  5
set.seed(545)
mat1 <- matrix(sample(12),ncol=4)
mat1
     [,1] [,2] [,3] [,4]
[1,]    9    4    1   11
[2,]   10    2    3    6
[3,]    7    5    8   12
mat2 <- matrix(sample(4),ncol=2)
mat2
     [,1] [,2]
[1,]    4    3
[2,]    2    1
liste <- list(matrice1=mat1,matrice2=mat2)
lapply(liste,mean)
$`matrice1`
[1] 6.5

$matrice2
[1] 2.5
lapply(liste,apply,2,sum,na.rm=T)
$`matrice1`
[1] 26 11 12 29

$matrice2
[1] 6 4
Z <- 1:5
T <- 5:1
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec2 <- c(rep("B1",3),rep("B2",2))
df <- data.frame(Z,T,vec1,vec2)
df
aggregate(df[,1:2],list(FacteurA=vec1),sum)
aggregate(df[,1:2],list(FacteurA=vec1,FacteurB=vec2),sum)
set.seed(1234)
X <- matrix(sample(12),nrow=3)
X
     [,1] [,2] [,3] [,4]
[1,]    2    6    1    8
[2,]    7   10   12    4
[3,]   11    5    3    9
mean.X <- apply(X,2,mean)
mean.X
[1] 6.666667 7.000000 5.333333 7.000000
sd.X <- apply(X,2,sd)
sd.X
[1] 4.509250 2.645751 5.859465 2.645751
Xc <- sweep(X,2,mean.X,FUN="-")
Xc
           [,1] [,2]      [,3] [,4]
[1,] -4.6666667   -1 -4.333333    1
[2,]  0.3333333    3  6.666667   -3
[3,]  4.3333333   -2 -2.333333    2
Xcr <- sweep(Xc,2,sd.X,FUN="/")
Xcr
            [,1]       [,2]       [,3]       [,4]
[1,] -1.03490978 -0.3779645 -0.7395442  0.3779645
[2,]  0.07392213  1.1338934  1.1377602 -1.1338934
[3,]  0.96098765 -0.7559289 -0.3982161  0.7559289
set.seed(1234)
T <- rnorm(50)
Z <- rnorm(50)+3*T+5
vec1 <- c(rep("A1",20),rep("A2",30))
don <- data.frame(Z,T)
by(don,list(FacteurA=vec1),summary)
FacteurA: A1
       Z                T          
 Min.   :-3.052   Min.   :-2.3457  
 1st Qu.: 2.624   1st Qu.:-0.8504  
 Median : 4.539   Median :-0.5288  
 Mean   : 4.417   Mean   :-0.2507  
 3rd Qu.: 6.205   3rd Qu.: 0.3154  
 Max.   :12.584   Max.   : 2.4158  
---------------------------------------------------------------------------------- 
FacteurA: A2
       Z                 T          
 Min.   :-0.9901   Min.   :-2.1800  
 1st Qu.: 1.6098   1st Qu.:-1.0574  
 Median : 3.1922   Median :-0.6088  
 Mean   : 3.3560   Mean   :-0.5880  
 3rd Qu.: 5.1170   3rd Qu.:-0.3309  
 Max.   : 9.2953   Max.   : 1.4495  
by(don,list(FacteurA=vec1),sum)
FacteurA: A1
[1] 83.32415
---------------------------------------------------------------------------------- 
FacteurA: A2
[1] 83.04172
mafonction <- function(x){
  summary(lm(Z~T, data=x))$coef
}
by(don, vec1, mafonction)
vec1: A1
            Estimate Std. Error  t value     Pr(>|t|)
(Intercept) 5.166501  0.2900179 17.81442 7.030010e-13
T           2.990575  0.2844888 10.51210 4.118736e-09
---------------------------------------------------------------------------------- 
vec1: A2
            Estimate Std. Error  t value     Pr(>|t|)
(Intercept) 5.024887  0.2123503 23.66320 4.718878e-20
T           2.838283  0.2203563 12.88043 2.753530e-13
set.seed(1234)
replicate(n=8, mean(rnorm(100)))
[1] -0.1567617424  0.0412431799  0.1546036721 -0.0081051362 -0.0217858703 -0.1368770057 -0.0878617963
[8] -0.0008371926
Mois <- c("Jan","Fév","Mar")
Année <- 2008:2010
outer(Mois,Année,FUN="paste")
     [,1]       [,2]       [,3]      
[1,] "Jan 2008" "Jan 2009" "Jan 2010"
[2,] "Fév 2008" "Fév 2009" "Fév 2010"
[3,] "Mar 2008" "Mar 2009" "Mar 2010"
outer(Mois,Année,FUN="paste",sep="-")
     [,1]       [,2]       [,3]      
[1,] "Jan-2008" "Jan-2009" "Jan-2010"
[2,] "Fév-2008" "Fév-2009" "Fév-2010"
[3,] "Mar-2008" "Mar-2009" "Mar-2010"

4.4.2 Le package parallel

require(parallel)
vignette("parallel")
require(parallel)
nb_cores <- detectCores()
nb_cores
cl <- makeCluster(nb_cores - 1)
res <- clusterCall(cl = cl, fun = function() return(1:4))
stopCluster(cl)
str(res)

4.4.3 Le package foreach

require(foreach)
vignette("foreach")
require(foreach)
x <- foreach(i = 1:3) %do% (round(sqrt(i),2))
str(x)
List of 3
 $ : num 1
 $ : num 1.41
 $ : num 1.73
require(foreach)
x <- foreach(i = 1:3, .combine = "+") %do% sqrt(i)
x
[1] 4.146264
require(numbers)
foreach(n = 1:50, .combine = "c") %:% when (isPrime(n)) %do% n
 [1]  2  3  5  7 11 13 17 19 23 29 31 37 41 43 47
require(doParallel)
cl <- makeCluster(3)
registerDoParallel(cl) # enregistrement du cluster
res <- foreach(n = 1:3) %dopar% rnorm(1000)
stopCluster(cl)

4.4.4 Exemple avancé

require(kernlab)
data(spam)
set.seed(125)
spam$fold <- sample(1:4, nrow(spam), replace = TRUE)
table(spam$type, spam$fold)
         
            1   2   3   4
  nonspam 669 700 688 731
  spam    452 489 439 433
cv_rf <- function(data_app, data_val){
  rf <- randomForest(type ~ ., data=data_app)
  y_val <- data.frame(type=data_val$type, y=predict(rf, newdata=data_val))
  list(rf=rf, y_val, err_rate=mean(y_val$y != y_val$type))
}
require(parallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
clusterExport(cl, varlist = c("spam", "cv_rf"))
clusterEvalQ(cl, {require(randomForest)})
[[1]]
[1] TRUE

[[2]]
[1] TRUE
res <- clusterApply(cl = cl, x = 1:4, fun = function(fold){
  spam_app <- spam[spam$fold != fold, ] # creation apprentissage
  spam_val <- spam[spam$fold == fold, ] # creation validation
  spam_app$fold <- NULL # suppression fold
  cv_rf(spam_app, spam_val) # calculs
})
stopCluster(cl)
sapply(res, function(x) x$err_rate)
[1] 0.05441570 0.05130362 0.05057675 0.04381443
require(foreach)
require(doParallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
registerDoParallel(cl)
res <- foreach(fold = 1:4, .packages = "randomForest",
  .noexport = setdiff(ls(), c("spam", "cv_rf"))) %dopar% {
    spam_app <- spam[spam$fold != fold, ]
    spam_val <- spam[spam$fold == fold, ]
    spam_app$fold <- NULL
    cv_rf(spam_app, spam_val)
  }
stopCluster(cl)
---
title: "Chapitre 4 : programmer"
author: "Husson et al."
date: "09/09/2018"
output:
  html_notebook:
    toc: yes
    toc_depth: 3
    toc_float: yes
  html_document:
    toc: yes
    toc_depth: '3'
    toc_float: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
```

# 4.1.2 Les boucles (for ou while)

```{r,message=FALSE,warning=FALSE,results='hide'}
for (i in 1:99) print(i)
for (i in seq(1,99,by=2)) print(i)
```

```{r,message=FALSE,warning=FALSE}
vecteur <- c("lundi","mardi","mercredi")
for (i in vecteur) print(i)
```

```{r}
i <- 1
while (i<3) {
  print(i)
  i <- i+1 }
```

# 4.1.3 Les conditions (if, else)

```{r}
i <- 1
repeat {
  print(i)
  i <- i+1
  if (i>3) break }
```

```{r}
X <- matrix(0,5,5)
switch(class(X),
  "matrix" = print("X est une matrice"),
  "data.frame" = print("X est un data.frame"),
  "numeric" = print("X est de classe numérique")
)
```

# 4.2 Construire une fonction

```{r}
som <- function(n) {
  resultat <- sum(1:n)
    return(resultat)
}
som(3)
res <- som(3)
res
```
```{r}
som <- function(n) {
  if (n<=0) stop("l’entier doit être strictement positif")
  if (ceiling(n)!=n) warning(paste("arrondi de",n,"en",ceiling(n)))
  resultat <- sum(1:ceiling(n))
  return(resultat)
}
som(4.325)
```
```{r}
mafonc <- function(facteur1,facteur2) {
  res1 <- table(facteur1,facteur2)
  selection <- which(res1==0,arr.ind = TRUE)
  res2 <- matrix("",nrow=nrow(selection),ncol=2)
  res2[,1] <- levels(facteur1)[selection[,1]]
  res2[,2] <- levels(facteur2)[selection[,2]]
  return(list(tab=res1,niveau=res2))
}
```
```{r}
tension <- factor(c(rep("Faible",5),rep("Forte",5)))
laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4)))
mafonc(tension,laine)
```

# 4.3 La famille apply, des fonctions d’itération prédéfinies
 
```{r}
set.seed(1234)
X <- matrix(sample(1:20,20),ncol=4)
X
apply(X,MARGIN=2,FUN=mean)
```
```{r}
X[1,1] <- NA
apply(X,MARGIN=2,FUN=mean)
apply(X,MARGIN=2,FUN=mean,na.rm=TRUE)
colMeans(X,na.rm=TRUE)
```
```{r}
set.seed(1234)
Y <- array(sample(24),dim=c(3,4,2))
Y
apply(Y,MARGIN=c(1,2),FUN=sum,na.rm=TRUE)
```
```{r}
MaFonction <- function(x,y) {
  z <- x**2 - y
  return(z)
}
set.seed(1234)
X <- matrix(sample(12),ncol=4)
X
apply(X,MARGIN=c(1,2),FUN=MaFonction, y=2)
```
```{r}
Z <- 1:5
Z
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec1
vec2 <- c(rep("B1",3),rep("B2",2))
vec2
tapply(Z,vec1,sum)
tapply(Z,list(vec1,vec2),sum)
```
```{r}
set.seed(545)
mat1 <- matrix(sample(12),ncol=4)
mat1
mat2 <- matrix(sample(4),ncol=2)
mat2
liste <- list(matrice1=mat1,matrice2=mat2)
lapply(liste,mean)
lapply(liste,apply,2,sum,na.rm=T)
```
```{r}
Z <- 1:5
T <- 5:1
vec1 <- c(rep("A1",2),rep("A2",2),rep("A3",1))
vec2 <- c(rep("B1",3),rep("B2",2))
df <- data.frame(Z,T,vec1,vec2)
df
aggregate(df[,1:2],list(FacteurA=vec1),sum)
aggregate(df[,1:2],list(FacteurA=vec1,FacteurB=vec2),sum)
```
```{r}
set.seed(1234)
X <- matrix(sample(12),nrow=3)
X
mean.X <- apply(X,2,mean)
mean.X
sd.X <- apply(X,2,sd)
sd.X
Xc <- sweep(X,2,mean.X,FUN="-")
Xc
Xcr <- sweep(Xc,2,sd.X,FUN="/")
Xcr
```
```{r}
set.seed(1234)
T <- rnorm(50)
Z <- rnorm(50)+3*T+5
vec1 <- c(rep("A1",20),rep("A2",30))
don <- data.frame(Z,T)
by(don,list(FacteurA=vec1),summary)
by(don,list(FacteurA=vec1),sum)
```
```{r}
mafonction <- function(x){
  summary(lm(Z~T, data=x))$coef
}
by(don, vec1, mafonction)
set.seed(1234)
replicate(n=8, mean(rnorm(100)))
Mois <- c("Jan","Fév","Mar")
Année <- 2008:2010
outer(Mois,Année,FUN="paste")
outer(Mois,Année,FUN="paste",sep="-")
```

# 4.4.2 Le package parallel

```{r, eval=FALSE}
require(parallel)
vignette("parallel")
```

```{r, message=FALSE, warning=FALSE, eval=FALSE}
require(parallel)
nb_cores <- detectCores()
nb_cores
cl <- makeCluster(nb_cores - 1)
res <- clusterCall(cl = cl, fun = function() return(1:4))
stopCluster(cl)
str(res)
```

# 4.4.3 Le package foreach

```{r, message=FALSE, warning=FALSE, eval=FALSE}
require(foreach)
vignette("foreach")
```
```{r, message=FALSE, warning=FALSE}
require(foreach)
x <- foreach(i = 1:3) %do% (round(sqrt(i),2))
str(x)
```

```{r, message=FALSE, warning=FALSE}
require(foreach)
x <- foreach(i = 1:3, .combine = "+") %do% sqrt(i)
x
require(numbers)
foreach(n = 1:50, .combine = "c") %:% when (isPrime(n)) %do% n
```

```{r, message=FALSE, warning=FALSE}
require(doParallel)
cl <- makeCluster(3)
registerDoParallel(cl) # enregistrement du cluster
res <- foreach(n = 1:3) %dopar% rnorm(1000)
stopCluster(cl)
```

# 4.4.4 Exemple avancé

```{r, message=FALSE, warning=FALSE}
require(kernlab)
data(spam)
set.seed(125)
spam$fold <- sample(1:4, nrow(spam), replace = TRUE)
table(spam$type, spam$fold)
cv_rf <- function(data_app, data_val){
  rf <- randomForest(type ~ ., data=data_app)
  y_val <- data.frame(type=data_val$type, y=predict(rf, newdata=data_val))
  list(rf=rf, y_val, err_rate=mean(y_val$y != y_val$type))
}
```
```{r, message=FALSE, warning=FALSE}
require(parallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
clusterExport(cl, varlist = c("spam", "cv_rf"))
clusterEvalQ(cl, {require(randomForest)})
res <- clusterApply(cl = cl, x = 1:4, fun = function(fold){
  spam_app <- spam[spam$fold != fold, ] # creation apprentissage
  spam_val <- spam[spam$fold == fold, ] # creation validation
  spam_app$fold <- NULL # suppression fold
  cv_rf(spam_app, spam_val) # calculs
})
stopCluster(cl)
sapply(res, function(x) x$err_rate)
```

```{r, message=FALSE, warning=FALSE}
require(foreach)
require(doParallel)
cl <- makeCluster(2)
clusterSetRNGStream(cl,iseed=78)
registerDoParallel(cl)
res <- foreach(fold = 1:4, .packages = "randomForest",
  .noexport = setdiff(ls(), c("spam", "cv_rf"))) %dopar% {
    spam_app <- spam[spam$fold != fold, ]
    spam_val <- spam[spam$fold == fold, ]
    spam_app$fold <- NULL
    cv_rf(spam_app, spam_val)
  }
stopCluster(cl)
```


