2.1.1 Importation d’un fichier texte

decath <- read.table("https://r-stat-sc-donnees.github.io/decathlon.csv",
                     sep=";",dec=".",header=T,row.names=1)
tablo <- read.table("https://r-stat-sc-donnees.github.io/donnees2.csv",sep=" ",header=TRUE,
                    na.strings = ".")
summary(tablo)
     taille          poids          pointure     sexe 
 Min.   :175.5   Min.   :72.00   Min.   :40.00   F:1  
 1st Qu.:176.8   1st Qu.:75.00   1st Qu.:41.50   M:3  
 Median :178.0   Median :78.00   Median :42.50        
 Mean   :179.2   Mean   :76.67   Mean   :42.25        
 3rd Qu.:181.0   3rd Qu.:79.00   3rd Qu.:43.25        
 Max.   :184.0   Max.   :80.00   Max.   :44.00        
 NA's   :1       NA's   :1                            
data(iris)

2.3 Manipuler les variables

X <- c(rep(10,3),rep(12,2),rep(13,4))
X
[1] 10 10 10 12 12 13 13 13 13
is.factor(X)
[1] FALSE
is.numeric(X)
[1] TRUE
summary(X)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10.00   10.00   12.00   11.78   13.00   13.00 
Xqual <- factor(X)
Xqual
[1] 10 10 10 12 12 13 13 13 13
Levels: 10 12 13
summary(Xqual)
10 12 13 
 3  2  4 
## conversion avec recodage des modalités
as.numeric(Xqual)
[1] 1 1 1 2 2 3 3 3 3
## conversion sans recodage des modalités : 2 étapes
provisoire <- as.character(Xqual)
provisoire
[1] "10" "10" "10" "12" "12" "13" "13" "13" "13"
as.numeric(provisoire)
[1] 10 10 10 12 12 13 13 13 13
set.seed(654) ## fixe la graine du générateur pseudo-aléatoire
X <- rnorm(15,mean=0,sd=1)
X
 [1] -0.76031762 -0.38970450  1.68962523 -0.09423560  0.09530146  0.81727228  1.06576755  0.93984563  0.74121222
[10] -0.43531214 -0.10726012 -0.83816833 -0.98260589 -0.82037099 -0.87143256
Xqual <- cut(X,breaks=c(min(X),-0.2,0.2,max(X)),include.lowest=TRUE)
Xqual
 [1] [-0.983,-0.2] [-0.983,-0.2] (0.2,1.69]    (-0.2,0.2]    (-0.2,0.2]    (0.2,1.69]    (0.2,1.69]    (0.2,1.69]   
 [9] (0.2,1.69]    [-0.983,-0.2] (-0.2,0.2]    [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2]
Levels: [-0.983,-0.2] (-0.2,0.2] (0.2,1.69]
table(Xqual)
Xqual
[-0.983,-0.2]    (-0.2,0.2]    (0.2,1.69] 
            7             3             5 
Xqual
 [1] [-0.983,-0.2] [-0.983,-0.2] (0.2,1.69]    (-0.2,0.2]    (-0.2,0.2]    (0.2,1.69]    (0.2,1.69]    (0.2,1.69]   
 [9] (0.2,1.69]    [-0.983,-0.2] (-0.2,0.2]    [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2]
Levels: [-0.983,-0.2] (-0.2,0.2] (0.2,1.69]
decoupe <- quantile(X,probs=seq(0,1,length=4))
Xqual <- cut(X,breaks=decoupe,include.lowest=TRUE)
table(Xqual)
Xqual
[-0.983,-0.544]  (-0.544,0.311]    (0.311,1.69] 
              5               5               5 
levels(Xqual) <- c("niv1","niv2","niv3")
Xqual
 [1] niv1 niv2 niv3 niv2 niv2 niv3 niv3 niv3 niv3 niv2 niv2 niv1 niv1 niv1 niv1
Levels: niv1 niv2 niv3
levels(Xqual) <- c("niv1+3","niv2","niv1+3")
Xqual
 [1] niv1+3 niv2   niv1+3 niv2   niv2   niv1+3 niv1+3 niv1+3 niv1+3 niv2   niv2   niv1+3 niv1+3 niv1+3 niv1+3
Levels: niv1+3 niv2
X <- c(1,1,2,2,2,3)
Xqual <- factor(X,label=c("classique","nouveau","placebo"))
Xqual
[1] classique classique nouveau   nouveau   nouveau   placebo  
Levels: classique nouveau placebo
Xqual2 <- relevel(Xqual,ref="placebo")
Xqual2
[1] classique classique nouveau   nouveau   nouveau   placebo  
Levels: placebo classique nouveau
Xqual3 <- factor(Xqual,levels=c("placebo","nouveau","classique"))
Xqual3
[1] classique classique nouveau   nouveau   nouveau   placebo  
Levels: placebo nouveau classique
facteur <- factor(c(rep("A",3),"B",rep("C",4)))
facteur
[1] A A A B C C C C
Levels: A B C
facteur2 <- facteur[-4]
facteur2
[1] A A A C C C C
Levels: A B C
facteur2 <- as.character(facteur2)
facteur2 <- factor(facteur2)
facteur2
[1] A A A C C C C
Levels: A C
facteur3 <- facteur[-4,drop=TRUE]
facteur3
[1] A A A C C C C
Levels: A C

2.4 Manipuler les individus

set.seed(23)
variable <- rnorm(10,mean=0,sd=1)
variable[c(3,4,6)] <- NA
select <- is.na(variable)
select
 [1] FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
which(select)
[1] 3 4 6
variable2 <- variable[!select]
variable2
[1]  0.19321233 -0.43468211  0.99660511 -0.27808628  1.01920549  0.04543718  1.57577959
variable3 <- variable[-which(select)]
all.equal(variable2,variable3)
[1] TRUE
varqual <- factor(c(rep("M",3),NA,NA,rep("F",5)))
don <- cbind.data.frame(variable,varqual)
don
      variable varqual
1   0.19321233       M
2  -0.43468211       M
3           NA       M
4           NA    <NA>
5   0.99660511    <NA>
6           NA       F
7  -0.27808628       F
8   1.01920549       F
9   0.04543718       F
10  1.57577959       F
summary(don)
    variable       varqual 
 Min.   :-0.4347   F   :5  
 1st Qu.:-0.1163   M   :3  
 Median : 0.1932   NA's:2  
 Mean   : 0.4454           
 3rd Qu.: 1.0079           
 Max.   : 1.5758           
 NA's   :3                 
select <- is.na(don)
select
      variable varqual
 [1,]    FALSE   FALSE
 [2,]    FALSE   FALSE
 [3,]     TRUE   FALSE
 [4,]     TRUE    TRUE
 [5,]    FALSE    TRUE
 [6,]     TRUE   FALSE
 [7,]    FALSE   FALSE
 [8,]    FALSE   FALSE
 [9,]    FALSE   FALSE
[10,]    FALSE   FALSE
aeliminer <- apply(select,MARGIN=1,FUN=any)
aeliminer
 [1] FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
don2 <- don[!aeliminer,]
don2
      variable varqual
1   0.19321233       M
2  -0.43468211       M
7  -0.27808628       F
8   1.01920549       F
9   0.04543718       F
10  1.57577959       F
which(is.na(don),arr.ind=TRUE)
     row col
[1,]   3   1
[2,]   4   1
[3,]   6   1
[4,]   4   2
[5,]   5   2
library(rpart)
data(kyphosis)
boxplot(kyphosis[,"Number"])

resultat <- boxplot(kyphosis[,"Number"])

valaberrante <- resultat$out
valaberrante
[1]  9 10
which(kyphosis[,"Number"]%in%valaberrante)
[1] 43 53
X <- data.frame(C1=c("a","b","b","a","a"),C2=c(1,2,2,3,1))
X
  C1 C2
1  a  1
2  b  2
3  b  2
4  a  3
5  a  1
unique(X)
  C1 C2
1  a  1
2  b  2
4  a  3
duplicated(X)
[1] FALSE FALSE  TRUE FALSE  TRUE
X[duplicated(X),]
  C1 C2
3  b  2
5  a  1

2.5 Concaténer des tableaux de données

X <- matrix(c(1,2,3,4),2,2)
rownames(X) <- paste("ligne",1:2,sep="")
colnames(X) <- paste("X",1:2,sep="")
X
       X1 X2
ligne1  1  3
ligne2  2  4
Y <- matrix(11:16,3,2)
colnames(Y) <- paste("Y",1:2,sep="")
Y
     Y1 Y2
[1,] 11 14
[2,] 12 15
[3,] 13 16
Z <- rbind(X,Y)
Z
       X1 X2
ligne1  1  3
ligne2  2  4
       11 14
       12 15
       13 16
Xd <- data.frame(X)
Yd <- data.frame(Y)
colnames(Yd) <- c("X2","X1")
rbind(Xd,Yd)
       X1 X2
ligne1  1  3
ligne2  2  4
1      14 11
2      15 12
3      16 13
X <- matrix(c(1,2,3,4),2,2)
rownames(X) <- paste("ligne",1:2,sep="")
Y <- matrix(11:16,2,3)
cbind(data.frame(X),Y)
       X1 X2  1  2  3
ligne1  1  3 11 13 15
ligne2  2  4 12 14 16
age <- c(7,38,32)
prenom <- c("arnaud","nicolas","laurent")
ville <- factor(c("rennes","rennes","marseille"))
indiv <- cbind.data.frame(age,prenom,ville)
indiv
  age  prenom     ville
1   7  arnaud    rennes
2  38 nicolas    rennes
3  32 laurent marseille
population <- c(200,500,800)
caractvilles <- cbind.data.frame(c("rennes","lyon","marseille"),population)
names(caractvilles) <- c("ville","pop")
caractvilles
      ville pop
1    rennes 200
2      lyon 500
3 marseille 800
merge(indiv,caractvilles,by="ville")
      ville age  prenom pop
1 marseille  32 laurent 800
2    rennes   7  arnaud 200
3    rennes  38 nicolas 200

2.6 Tableau croisé

tension <- factor(c(rep("Faible",5),rep("Forte",5)))
tension
 [1] Faible Faible Faible Faible Faible Forte  Forte  Forte  Forte  Forte 
Levels: Faible Forte
laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4)))
laine
 [1] Mer Mer Mer Ang Ang Ang Tex Tex Tex Tex
Levels: Ang Mer Tex
don <- cbind.data.frame(tension,laine)
don
   tension laine
1   Faible   Mer
2   Faible   Mer
3   Faible   Mer
4   Faible   Ang
5   Faible   Ang
6    Forte   Ang
7    Forte   Tex
8    Forte   Tex
9    Forte   Tex
10   Forte   Tex
table(don$tension,don$laine)
        
         Ang Mer Tex
  Faible   2   3   0
  Forte    1   0   4
tabcroise <- xtabs(~tension+laine,data=don)
tabcroise
        laine
tension  Ang Mer Tex
  Faible   2   3   0
  Forte    1   0   4
tabframe <- as.data.frame(tabcroise)
tabframe
  tension laine Freq
1  Faible   Ang    2
2   Forte   Ang    1
3  Faible   Mer    3
4   Forte   Mer    0
5  Faible   Tex    0
6   Forte   Tex    4
LS0tDQp0aXRsZTogIkNoYXBpdHJlIDIgOiBtYW5pcHVsZXIgbGVzIGRvbm7DqWVzIg0KYXV0aG9yOiAiSHVzc29uIGV0IGFsLiINCmRhdGU6ICIwOS8wOS8yMDE4Ig0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOg0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogJzMnDQogICAgdG9jX2Zsb2F0OiB5ZXMNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgY2FjaGUgPSBUUlVFKQ0KYGBgDQoNCiMgMi4xLjEgSW1wb3J0YXRpb24gZOKAmXVuIGZpY2hpZXIgdGV4dGUNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KZGVjYXRoIDwtIHJlYWQudGFibGUoImh0dHBzOi8vci1zdGF0LXNjLWRvbm5lZXMuZ2l0aHViLmlvL2RlY2F0aGxvbi5jc3YiLA0KICAgICAgICAgICAgICAgICAgICAgc2VwPSI7IixkZWM9Ii4iLGhlYWRlcj1ULHJvdy5uYW1lcz0xKQ0KYGBgDQoNCmBgYHtyfQ0KdGFibG8gPC0gcmVhZC50YWJsZSgiaHR0cHM6Ly9yLXN0YXQtc2MtZG9ubmVlcy5naXRodWIuaW8vZG9ubmVlczIuY3N2IixzZXA9IiAiLGhlYWRlcj1UUlVFLA0KICAgICAgICAgICAgICAgICAgICBuYS5zdHJpbmdzID0gIi4iKQ0Kc3VtbWFyeSh0YWJsbykNCmBgYA0KDQpgYGB7cn0NCmRhdGEoaXJpcykNCmBgYA0KDQojIDIuMyBNYW5pcHVsZXIgbGVzIHZhcmlhYmxlcw0KDQpgYGB7cn0NClggPC0gYyhyZXAoMTAsMykscmVwKDEyLDIpLHJlcCgxMyw0KSkNClgNCmlzLmZhY3RvcihYKQ0KaXMubnVtZXJpYyhYKQ0Kc3VtbWFyeShYKQ0KWHF1YWwgPC0gZmFjdG9yKFgpDQpYcXVhbA0Kc3VtbWFyeShYcXVhbCkNCmBgYA0KYGBge3J9DQojIyBjb252ZXJzaW9uIGF2ZWMgcmVjb2RhZ2UgZGVzIG1vZGFsaXTDqXMNCmFzLm51bWVyaWMoWHF1YWwpDQojIyBjb252ZXJzaW9uIHNhbnMgcmVjb2RhZ2UgZGVzIG1vZGFsaXTDqXMgOiAyIMOpdGFwZXMNCnByb3Zpc29pcmUgPC0gYXMuY2hhcmFjdGVyKFhxdWFsKQ0KcHJvdmlzb2lyZQ0KYXMubnVtZXJpYyhwcm92aXNvaXJlKQ0KYGBgDQpgYGB7cn0NCnNldC5zZWVkKDY1NCkgIyMgZml4ZSBsYSBncmFpbmUgZHUgZ8OpbsOpcmF0ZXVyIHBzZXVkby1hbMOpYXRvaXJlDQpYIDwtIHJub3JtKDE1LG1lYW49MCxzZD0xKQ0KWA0KWHF1YWwgPC0gY3V0KFgsYnJlYWtzPWMobWluKFgpLC0wLjIsMC4yLG1heChYKSksaW5jbHVkZS5sb3dlc3Q9VFJVRSkNClhxdWFsDQp0YWJsZShYcXVhbCkNClhxdWFsDQpkZWNvdXBlIDwtIHF1YW50aWxlKFgscHJvYnM9c2VxKDAsMSxsZW5ndGg9NCkpDQpYcXVhbCA8LSBjdXQoWCxicmVha3M9ZGVjb3VwZSxpbmNsdWRlLmxvd2VzdD1UUlVFKQ0KdGFibGUoWHF1YWwpDQpsZXZlbHMoWHF1YWwpIDwtIGMoIm5pdjEiLCJuaXYyIiwibml2MyIpDQpYcXVhbA0KbGV2ZWxzKFhxdWFsKSA8LSBjKCJuaXYxKzMiLCJuaXYyIiwibml2MSszIikNClhxdWFsDQpYIDwtIGMoMSwxLDIsMiwyLDMpDQpYcXVhbCA8LSBmYWN0b3IoWCxsYWJlbD1jKCJjbGFzc2lxdWUiLCJub3V2ZWF1IiwicGxhY2VibyIpKQ0KWHF1YWwNClhxdWFsMiA8LSByZWxldmVsKFhxdWFsLHJlZj0icGxhY2VibyIpDQpYcXVhbDINClhxdWFsMyA8LSBmYWN0b3IoWHF1YWwsbGV2ZWxzPWMoInBsYWNlYm8iLCJub3V2ZWF1IiwiY2xhc3NpcXVlIikpDQpYcXVhbDMNCmZhY3RldXIgPC0gZmFjdG9yKGMocmVwKCJBIiwzKSwiQiIscmVwKCJDIiw0KSkpDQpmYWN0ZXVyDQpmYWN0ZXVyMiA8LSBmYWN0ZXVyWy00XQ0KZmFjdGV1cjINCmZhY3RldXIyIDwtIGFzLmNoYXJhY3RlcihmYWN0ZXVyMikNCmZhY3RldXIyIDwtIGZhY3RvcihmYWN0ZXVyMikNCmZhY3RldXIyDQpmYWN0ZXVyMyA8LSBmYWN0ZXVyWy00LGRyb3A9VFJVRV0NCmZhY3RldXIzDQpgYGANCg0KIyAyLjQgTWFuaXB1bGVyIGxlcyBpbmRpdmlkdXMNCg0KYGBge3J9DQpzZXQuc2VlZCgyMykNCnZhcmlhYmxlIDwtIHJub3JtKDEwLG1lYW49MCxzZD0xKQ0KdmFyaWFibGVbYygzLDQsNildIDwtIE5BDQpzZWxlY3QgPC0gaXMubmEodmFyaWFibGUpDQpzZWxlY3QNCndoaWNoKHNlbGVjdCkNCnZhcmlhYmxlMiA8LSB2YXJpYWJsZVshc2VsZWN0XQ0KdmFyaWFibGUyDQp2YXJpYWJsZTMgPC0gdmFyaWFibGVbLXdoaWNoKHNlbGVjdCldDQphbGwuZXF1YWwodmFyaWFibGUyLHZhcmlhYmxlMykNCnZhcnF1YWwgPC0gZmFjdG9yKGMocmVwKCJNIiwzKSxOQSxOQSxyZXAoIkYiLDUpKSkNCmRvbiA8LSBjYmluZC5kYXRhLmZyYW1lKHZhcmlhYmxlLHZhcnF1YWwpDQpkb24NCnN1bW1hcnkoZG9uKQ0Kc2VsZWN0IDwtIGlzLm5hKGRvbikNCnNlbGVjdA0KYWVsaW1pbmVyIDwtIGFwcGx5KHNlbGVjdCxNQVJHSU49MSxGVU49YW55KQ0KYWVsaW1pbmVyDQpkb24yIDwtIGRvblshYWVsaW1pbmVyLF0NCmRvbjINCndoaWNoKGlzLm5hKGRvbiksYXJyLmluZD1UUlVFKQ0KYGBgDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkocnBhcnQpDQpkYXRhKGt5cGhvc2lzKQ0KYm94cGxvdChreXBob3Npc1ssIk51bWJlciJdKQ0KcmVzdWx0YXQgPC0gYm94cGxvdChreXBob3Npc1ssIk51bWJlciJdKQ0KdmFsYWJlcnJhbnRlIDwtIHJlc3VsdGF0JG91dA0KdmFsYWJlcnJhbnRlDQp3aGljaChreXBob3Npc1ssIk51bWJlciJdJWluJXZhbGFiZXJyYW50ZSkNClggPC0gZGF0YS5mcmFtZShDMT1jKCJhIiwiYiIsImIiLCJhIiwiYSIpLEMyPWMoMSwyLDIsMywxKSkNClgNCnVuaXF1ZShYKQ0KZHVwbGljYXRlZChYKQ0KWFtkdXBsaWNhdGVkKFgpLF0NCmBgYA0KDQojIDIuNSBDb25jYXTDqW5lciBkZXMgdGFibGVhdXggZGUgZG9ubsOpZXMNCg0KYGBge3J9DQpYIDwtIG1hdHJpeChjKDEsMiwzLDQpLDIsMikNCnJvd25hbWVzKFgpIDwtIHBhc3RlKCJsaWduZSIsMToyLHNlcD0iIikNCmNvbG5hbWVzKFgpIDwtIHBhc3RlKCJYIiwxOjIsc2VwPSIiKQ0KWA0KWSA8LSBtYXRyaXgoMTE6MTYsMywyKQ0KY29sbmFtZXMoWSkgPC0gcGFzdGUoIlkiLDE6MixzZXA9IiIpDQpZDQpaIDwtIHJiaW5kKFgsWSkNCloNClhkIDwtIGRhdGEuZnJhbWUoWCkNCllkIDwtIGRhdGEuZnJhbWUoWSkNCmNvbG5hbWVzKFlkKSA8LSBjKCJYMiIsIlgxIikNCnJiaW5kKFhkLFlkKQ0KWCA8LSBtYXRyaXgoYygxLDIsMyw0KSwyLDIpDQpyb3duYW1lcyhYKSA8LSBwYXN0ZSgibGlnbmUiLDE6MixzZXA9IiIpDQpZIDwtIG1hdHJpeCgxMToxNiwyLDMpDQpjYmluZChkYXRhLmZyYW1lKFgpLFkpDQpgYGANCmBgYHtyfQ0KYWdlIDwtIGMoNywzOCwzMikNCnByZW5vbSA8LSBjKCJhcm5hdWQiLCJuaWNvbGFzIiwibGF1cmVudCIpDQp2aWxsZSA8LSBmYWN0b3IoYygicmVubmVzIiwicmVubmVzIiwibWFyc2VpbGxlIikpDQppbmRpdiA8LSBjYmluZC5kYXRhLmZyYW1lKGFnZSxwcmVub20sdmlsbGUpDQppbmRpdg0KcG9wdWxhdGlvbiA8LSBjKDIwMCw1MDAsODAwKQ0KY2FyYWN0dmlsbGVzIDwtIGNiaW5kLmRhdGEuZnJhbWUoYygicmVubmVzIiwibHlvbiIsIm1hcnNlaWxsZSIpLHBvcHVsYXRpb24pDQpuYW1lcyhjYXJhY3R2aWxsZXMpIDwtIGMoInZpbGxlIiwicG9wIikNCmNhcmFjdHZpbGxlcw0KbWVyZ2UoaW5kaXYsY2FyYWN0dmlsbGVzLGJ5PSJ2aWxsZSIpDQpgYGANCg0KIyAyLjYgVGFibGVhdSBjcm9pc8OpDQoNCmBgYHtyfQ0KdGVuc2lvbiA8LSBmYWN0b3IoYyhyZXAoIkZhaWJsZSIsNSkscmVwKCJGb3J0ZSIsNSkpKQ0KdGVuc2lvbg0KbGFpbmUgPC0gZmFjdG9yKGMocmVwKCJNZXIiLDMpLHJlcCgiQW5nIiwzKSxyZXAoIlRleCIsNCkpKQ0KbGFpbmUNCmRvbiA8LSBjYmluZC5kYXRhLmZyYW1lKHRlbnNpb24sbGFpbmUpDQpkb24NCnRhYmxlKGRvbiR0ZW5zaW9uLGRvbiRsYWluZSkNCnRhYmNyb2lzZSA8LSB4dGFicyh+dGVuc2lvbitsYWluZSxkYXRhPWRvbikNCnRhYmNyb2lzZQ0KdGFiZnJhbWUgPC0gYXMuZGF0YS5mcmFtZSh0YWJjcm9pc2UpDQp0YWJmcmFtZQ0KYGBgDQoNCg==