2.1.1 Importation d’un fichier texte
decath <- read.table("https://r-stat-sc-donnees.github.io/decathlon.csv",
sep=";",dec=".",header=T,row.names=1)
tablo <- read.table("https://r-stat-sc-donnees.github.io/donnees2.csv",sep=" ",header=TRUE,
na.strings = ".")
summary(tablo)
taille poids pointure sexe
Min. :175.5 Min. :72.00 Min. :40.00 F:1
1st Qu.:176.8 1st Qu.:75.00 1st Qu.:41.50 M:3
Median :178.0 Median :78.00 Median :42.50
Mean :179.2 Mean :76.67 Mean :42.25
3rd Qu.:181.0 3rd Qu.:79.00 3rd Qu.:43.25
Max. :184.0 Max. :80.00 Max. :44.00
NA's :1 NA's :1
data(iris)
2.3 Manipuler les variables
X <- c(rep(10,3),rep(12,2),rep(13,4))
X
[1] 10 10 10 12 12 13 13 13 13
is.factor(X)
[1] FALSE
is.numeric(X)
[1] TRUE
summary(X)
Min. 1st Qu. Median Mean 3rd Qu. Max.
10.00 10.00 12.00 11.78 13.00 13.00
Xqual <- factor(X)
Xqual
[1] 10 10 10 12 12 13 13 13 13
Levels: 10 12 13
summary(Xqual)
10 12 13
3 2 4
## conversion avec recodage des modalités
as.numeric(Xqual)
[1] 1 1 1 2 2 3 3 3 3
## conversion sans recodage des modalités : 2 étapes
provisoire <- as.character(Xqual)
provisoire
[1] "10" "10" "10" "12" "12" "13" "13" "13" "13"
as.numeric(provisoire)
[1] 10 10 10 12 12 13 13 13 13
set.seed(654) ## fixe la graine du générateur pseudo-aléatoire
X <- rnorm(15,mean=0,sd=1)
X
[1] -0.76031762 -0.38970450 1.68962523 -0.09423560 0.09530146 0.81727228 1.06576755 0.93984563 0.74121222
[10] -0.43531214 -0.10726012 -0.83816833 -0.98260589 -0.82037099 -0.87143256
Xqual <- cut(X,breaks=c(min(X),-0.2,0.2,max(X)),include.lowest=TRUE)
Xqual
[1] [-0.983,-0.2] [-0.983,-0.2] (0.2,1.69] (-0.2,0.2] (-0.2,0.2] (0.2,1.69] (0.2,1.69] (0.2,1.69]
[9] (0.2,1.69] [-0.983,-0.2] (-0.2,0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2]
Levels: [-0.983,-0.2] (-0.2,0.2] (0.2,1.69]
table(Xqual)
Xqual
[-0.983,-0.2] (-0.2,0.2] (0.2,1.69]
7 3 5
Xqual
[1] [-0.983,-0.2] [-0.983,-0.2] (0.2,1.69] (-0.2,0.2] (-0.2,0.2] (0.2,1.69] (0.2,1.69] (0.2,1.69]
[9] (0.2,1.69] [-0.983,-0.2] (-0.2,0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2] [-0.983,-0.2]
Levels: [-0.983,-0.2] (-0.2,0.2] (0.2,1.69]
decoupe <- quantile(X,probs=seq(0,1,length=4))
Xqual <- cut(X,breaks=decoupe,include.lowest=TRUE)
table(Xqual)
Xqual
[-0.983,-0.544] (-0.544,0.311] (0.311,1.69]
5 5 5
levels(Xqual) <- c("niv1","niv2","niv3")
Xqual
[1] niv1 niv2 niv3 niv2 niv2 niv3 niv3 niv3 niv3 niv2 niv2 niv1 niv1 niv1 niv1
Levels: niv1 niv2 niv3
levels(Xqual) <- c("niv1+3","niv2","niv1+3")
Xqual
[1] niv1+3 niv2 niv1+3 niv2 niv2 niv1+3 niv1+3 niv1+3 niv1+3 niv2 niv2 niv1+3 niv1+3 niv1+3 niv1+3
Levels: niv1+3 niv2
X <- c(1,1,2,2,2,3)
Xqual <- factor(X,label=c("classique","nouveau","placebo"))
Xqual
[1] classique classique nouveau nouveau nouveau placebo
Levels: classique nouveau placebo
Xqual2 <- relevel(Xqual,ref="placebo")
Xqual2
[1] classique classique nouveau nouveau nouveau placebo
Levels: placebo classique nouveau
Xqual3 <- factor(Xqual,levels=c("placebo","nouveau","classique"))
Xqual3
[1] classique classique nouveau nouveau nouveau placebo
Levels: placebo nouveau classique
facteur <- factor(c(rep("A",3),"B",rep("C",4)))
facteur
[1] A A A B C C C C
Levels: A B C
facteur2 <- facteur[-4]
facteur2
[1] A A A C C C C
Levels: A B C
facteur2 <- as.character(facteur2)
facteur2 <- factor(facteur2)
facteur2
[1] A A A C C C C
Levels: A C
facteur3 <- facteur[-4,drop=TRUE]
facteur3
[1] A A A C C C C
Levels: A C
2.4 Manipuler les individus
set.seed(23)
variable <- rnorm(10,mean=0,sd=1)
variable[c(3,4,6)] <- NA
select <- is.na(variable)
select
[1] FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
which(select)
[1] 3 4 6
variable2 <- variable[!select]
variable2
[1] 0.19321233 -0.43468211 0.99660511 -0.27808628 1.01920549 0.04543718 1.57577959
variable3 <- variable[-which(select)]
all.equal(variable2,variable3)
[1] TRUE
varqual <- factor(c(rep("M",3),NA,NA,rep("F",5)))
don <- cbind.data.frame(variable,varqual)
don
variable varqual
1 0.19321233 M
2 -0.43468211 M
3 NA M
4 NA <NA>
5 0.99660511 <NA>
6 NA F
7 -0.27808628 F
8 1.01920549 F
9 0.04543718 F
10 1.57577959 F
summary(don)
variable varqual
Min. :-0.4347 F :5
1st Qu.:-0.1163 M :3
Median : 0.1932 NA's:2
Mean : 0.4454
3rd Qu.: 1.0079
Max. : 1.5758
NA's :3
select <- is.na(don)
select
variable varqual
[1,] FALSE FALSE
[2,] FALSE FALSE
[3,] TRUE FALSE
[4,] TRUE TRUE
[5,] FALSE TRUE
[6,] TRUE FALSE
[7,] FALSE FALSE
[8,] FALSE FALSE
[9,] FALSE FALSE
[10,] FALSE FALSE
aeliminer <- apply(select,MARGIN=1,FUN=any)
aeliminer
[1] FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
don2 <- don[!aeliminer,]
don2
variable varqual
1 0.19321233 M
2 -0.43468211 M
7 -0.27808628 F
8 1.01920549 F
9 0.04543718 F
10 1.57577959 F
which(is.na(don),arr.ind=TRUE)
row col
[1,] 3 1
[2,] 4 1
[3,] 6 1
[4,] 4 2
[5,] 5 2
library(rpart)
data(kyphosis)
boxplot(kyphosis[,"Number"])
resultat <- boxplot(kyphosis[,"Number"])
valaberrante <- resultat$out
valaberrante
[1] 9 10
which(kyphosis[,"Number"]%in%valaberrante)
[1] 43 53
X <- data.frame(C1=c("a","b","b","a","a"),C2=c(1,2,2,3,1))
X
C1 C2
1 a 1
2 b 2
3 b 2
4 a 3
5 a 1
unique(X)
C1 C2
1 a 1
2 b 2
4 a 3
duplicated(X)
[1] FALSE FALSE TRUE FALSE TRUE
X[duplicated(X),]
C1 C2
3 b 2
5 a 1
2.5 Concaténer des tableaux de données
X <- matrix(c(1,2,3,4),2,2)
rownames(X) <- paste("ligne",1:2,sep="")
colnames(X) <- paste("X",1:2,sep="")
X
X1 X2
ligne1 1 3
ligne2 2 4
Y <- matrix(11:16,3,2)
colnames(Y) <- paste("Y",1:2,sep="")
Y
Y1 Y2
[1,] 11 14
[2,] 12 15
[3,] 13 16
Z <- rbind(X,Y)
Z
X1 X2
ligne1 1 3
ligne2 2 4
11 14
12 15
13 16
Xd <- data.frame(X)
Yd <- data.frame(Y)
colnames(Yd) <- c("X2","X1")
rbind(Xd,Yd)
X1 X2
ligne1 1 3
ligne2 2 4
1 14 11
2 15 12
3 16 13
X <- matrix(c(1,2,3,4),2,2)
rownames(X) <- paste("ligne",1:2,sep="")
Y <- matrix(11:16,2,3)
cbind(data.frame(X),Y)
X1 X2 1 2 3
ligne1 1 3 11 13 15
ligne2 2 4 12 14 16
age <- c(7,38,32)
prenom <- c("arnaud","nicolas","laurent")
ville <- factor(c("rennes","rennes","marseille"))
indiv <- cbind.data.frame(age,prenom,ville)
indiv
age prenom ville
1 7 arnaud rennes
2 38 nicolas rennes
3 32 laurent marseille
population <- c(200,500,800)
caractvilles <- cbind.data.frame(c("rennes","lyon","marseille"),population)
names(caractvilles) <- c("ville","pop")
caractvilles
ville pop
1 rennes 200
2 lyon 500
3 marseille 800
merge(indiv,caractvilles,by="ville")
ville age prenom pop
1 marseille 32 laurent 800
2 rennes 7 arnaud 200
3 rennes 38 nicolas 200
2.6 Tableau croisé
tension <- factor(c(rep("Faible",5),rep("Forte",5)))
tension
[1] Faible Faible Faible Faible Faible Forte Forte Forte Forte Forte
Levels: Faible Forte
laine <- factor(c(rep("Mer",3),rep("Ang",3),rep("Tex",4)))
laine
[1] Mer Mer Mer Ang Ang Ang Tex Tex Tex Tex
Levels: Ang Mer Tex
don <- cbind.data.frame(tension,laine)
don
tension laine
1 Faible Mer
2 Faible Mer
3 Faible Mer
4 Faible Ang
5 Faible Ang
6 Forte Ang
7 Forte Tex
8 Forte Tex
9 Forte Tex
10 Forte Tex
table(don$tension,don$laine)
Ang Mer Tex
Faible 2 3 0
Forte 1 0 4
tabcroise <- xtabs(~tension+laine,data=don)
tabcroise
laine
tension Ang Mer Tex
Faible 2 3 0
Forte 1 0 4
tabframe <- as.data.frame(tabcroise)
tabframe
tension laine Freq
1 Faible Ang 2
2 Forte Ang 1
3 Faible Mer 3
4 Forte Mer 0
5 Faible Tex 0
6 Forte Tex 4
LS0tDQp0aXRsZTogIkNoYXBpdHJlIDIgOiBtYW5pcHVsZXIgbGVzIGRvbm7DqWVzIg0KYXV0aG9yOiAiSHVzc29uIGV0IGFsLiINCmRhdGU6ICIwOS8wOS8yMDE4Ig0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOg0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogJzMnDQogICAgdG9jX2Zsb2F0OiB5ZXMNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgY2FjaGUgPSBUUlVFKQ0KYGBgDQoNCiMgMi4xLjEgSW1wb3J0YXRpb24gZOKAmXVuIGZpY2hpZXIgdGV4dGUNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KZGVjYXRoIDwtIHJlYWQudGFibGUoImh0dHBzOi8vci1zdGF0LXNjLWRvbm5lZXMuZ2l0aHViLmlvL2RlY2F0aGxvbi5jc3YiLA0KICAgICAgICAgICAgICAgICAgICAgc2VwPSI7IixkZWM9Ii4iLGhlYWRlcj1ULHJvdy5uYW1lcz0xKQ0KYGBgDQoNCmBgYHtyfQ0KdGFibG8gPC0gcmVhZC50YWJsZSgiaHR0cHM6Ly9yLXN0YXQtc2MtZG9ubmVlcy5naXRodWIuaW8vZG9ubmVlczIuY3N2IixzZXA9IiAiLGhlYWRlcj1UUlVFLA0KICAgICAgICAgICAgICAgICAgICBuYS5zdHJpbmdzID0gIi4iKQ0Kc3VtbWFyeSh0YWJsbykNCmBgYA0KDQpgYGB7cn0NCmRhdGEoaXJpcykNCmBgYA0KDQojIDIuMyBNYW5pcHVsZXIgbGVzIHZhcmlhYmxlcw0KDQpgYGB7cn0NClggPC0gYyhyZXAoMTAsMykscmVwKDEyLDIpLHJlcCgxMyw0KSkNClgNCmlzLmZhY3RvcihYKQ0KaXMubnVtZXJpYyhYKQ0Kc3VtbWFyeShYKQ0KWHF1YWwgPC0gZmFjdG9yKFgpDQpYcXVhbA0Kc3VtbWFyeShYcXVhbCkNCmBgYA0KYGBge3J9DQojIyBjb252ZXJzaW9uIGF2ZWMgcmVjb2RhZ2UgZGVzIG1vZGFsaXTDqXMNCmFzLm51bWVyaWMoWHF1YWwpDQojIyBjb252ZXJzaW9uIHNhbnMgcmVjb2RhZ2UgZGVzIG1vZGFsaXTDqXMgOiAyIMOpdGFwZXMNCnByb3Zpc29pcmUgPC0gYXMuY2hhcmFjdGVyKFhxdWFsKQ0KcHJvdmlzb2lyZQ0KYXMubnVtZXJpYyhwcm92aXNvaXJlKQ0KYGBgDQpgYGB7cn0NCnNldC5zZWVkKDY1NCkgIyMgZml4ZSBsYSBncmFpbmUgZHUgZ8OpbsOpcmF0ZXVyIHBzZXVkby1hbMOpYXRvaXJlDQpYIDwtIHJub3JtKDE1LG1lYW49MCxzZD0xKQ0KWA0KWHF1YWwgPC0gY3V0KFgsYnJlYWtzPWMobWluKFgpLC0wLjIsMC4yLG1heChYKSksaW5jbHVkZS5sb3dlc3Q9VFJVRSkNClhxdWFsDQp0YWJsZShYcXVhbCkNClhxdWFsDQpkZWNvdXBlIDwtIHF1YW50aWxlKFgscHJvYnM9c2VxKDAsMSxsZW5ndGg9NCkpDQpYcXVhbCA8LSBjdXQoWCxicmVha3M9ZGVjb3VwZSxpbmNsdWRlLmxvd2VzdD1UUlVFKQ0KdGFibGUoWHF1YWwpDQpsZXZlbHMoWHF1YWwpIDwtIGMoIm5pdjEiLCJuaXYyIiwibml2MyIpDQpYcXVhbA0KbGV2ZWxzKFhxdWFsKSA8LSBjKCJuaXYxKzMiLCJuaXYyIiwibml2MSszIikNClhxdWFsDQpYIDwtIGMoMSwxLDIsMiwyLDMpDQpYcXVhbCA8LSBmYWN0b3IoWCxsYWJlbD1jKCJjbGFzc2lxdWUiLCJub3V2ZWF1IiwicGxhY2VibyIpKQ0KWHF1YWwNClhxdWFsMiA8LSByZWxldmVsKFhxdWFsLHJlZj0icGxhY2VibyIpDQpYcXVhbDINClhxdWFsMyA8LSBmYWN0b3IoWHF1YWwsbGV2ZWxzPWMoInBsYWNlYm8iLCJub3V2ZWF1IiwiY2xhc3NpcXVlIikpDQpYcXVhbDMNCmZhY3RldXIgPC0gZmFjdG9yKGMocmVwKCJBIiwzKSwiQiIscmVwKCJDIiw0KSkpDQpmYWN0ZXVyDQpmYWN0ZXVyMiA8LSBmYWN0ZXVyWy00XQ0KZmFjdGV1cjINCmZhY3RldXIyIDwtIGFzLmNoYXJhY3RlcihmYWN0ZXVyMikNCmZhY3RldXIyIDwtIGZhY3RvcihmYWN0ZXVyMikNCmZhY3RldXIyDQpmYWN0ZXVyMyA8LSBmYWN0ZXVyWy00LGRyb3A9VFJVRV0NCmZhY3RldXIzDQpgYGANCg0KIyAyLjQgTWFuaXB1bGVyIGxlcyBpbmRpdmlkdXMNCg0KYGBge3J9DQpzZXQuc2VlZCgyMykNCnZhcmlhYmxlIDwtIHJub3JtKDEwLG1lYW49MCxzZD0xKQ0KdmFyaWFibGVbYygzLDQsNildIDwtIE5BDQpzZWxlY3QgPC0gaXMubmEodmFyaWFibGUpDQpzZWxlY3QNCndoaWNoKHNlbGVjdCkNCnZhcmlhYmxlMiA8LSB2YXJpYWJsZVshc2VsZWN0XQ0KdmFyaWFibGUyDQp2YXJpYWJsZTMgPC0gdmFyaWFibGVbLXdoaWNoKHNlbGVjdCldDQphbGwuZXF1YWwodmFyaWFibGUyLHZhcmlhYmxlMykNCnZhcnF1YWwgPC0gZmFjdG9yKGMocmVwKCJNIiwzKSxOQSxOQSxyZXAoIkYiLDUpKSkNCmRvbiA8LSBjYmluZC5kYXRhLmZyYW1lKHZhcmlhYmxlLHZhcnF1YWwpDQpkb24NCnN1bW1hcnkoZG9uKQ0Kc2VsZWN0IDwtIGlzLm5hKGRvbikNCnNlbGVjdA0KYWVsaW1pbmVyIDwtIGFwcGx5KHNlbGVjdCxNQVJHSU49MSxGVU49YW55KQ0KYWVsaW1pbmVyDQpkb24yIDwtIGRvblshYWVsaW1pbmVyLF0NCmRvbjINCndoaWNoKGlzLm5hKGRvbiksYXJyLmluZD1UUlVFKQ0KYGBgDQoNCmBgYHtyLG1lc3NhZ2U9RkFMU0Usd2FybmluZz1GQUxTRX0NCmxpYnJhcnkocnBhcnQpDQpkYXRhKGt5cGhvc2lzKQ0KYm94cGxvdChreXBob3Npc1ssIk51bWJlciJdKQ0KcmVzdWx0YXQgPC0gYm94cGxvdChreXBob3Npc1ssIk51bWJlciJdKQ0KdmFsYWJlcnJhbnRlIDwtIHJlc3VsdGF0JG91dA0KdmFsYWJlcnJhbnRlDQp3aGljaChreXBob3Npc1ssIk51bWJlciJdJWluJXZhbGFiZXJyYW50ZSkNClggPC0gZGF0YS5mcmFtZShDMT1jKCJhIiwiYiIsImIiLCJhIiwiYSIpLEMyPWMoMSwyLDIsMywxKSkNClgNCnVuaXF1ZShYKQ0KZHVwbGljYXRlZChYKQ0KWFtkdXBsaWNhdGVkKFgpLF0NCmBgYA0KDQojIDIuNSBDb25jYXTDqW5lciBkZXMgdGFibGVhdXggZGUgZG9ubsOpZXMNCg0KYGBge3J9DQpYIDwtIG1hdHJpeChjKDEsMiwzLDQpLDIsMikNCnJvd25hbWVzKFgpIDwtIHBhc3RlKCJsaWduZSIsMToyLHNlcD0iIikNCmNvbG5hbWVzKFgpIDwtIHBhc3RlKCJYIiwxOjIsc2VwPSIiKQ0KWA0KWSA8LSBtYXRyaXgoMTE6MTYsMywyKQ0KY29sbmFtZXMoWSkgPC0gcGFzdGUoIlkiLDE6MixzZXA9IiIpDQpZDQpaIDwtIHJiaW5kKFgsWSkNCloNClhkIDwtIGRhdGEuZnJhbWUoWCkNCllkIDwtIGRhdGEuZnJhbWUoWSkNCmNvbG5hbWVzKFlkKSA8LSBjKCJYMiIsIlgxIikNCnJiaW5kKFhkLFlkKQ0KWCA8LSBtYXRyaXgoYygxLDIsMyw0KSwyLDIpDQpyb3duYW1lcyhYKSA8LSBwYXN0ZSgibGlnbmUiLDE6MixzZXA9IiIpDQpZIDwtIG1hdHJpeCgxMToxNiwyLDMpDQpjYmluZChkYXRhLmZyYW1lKFgpLFkpDQpgYGANCmBgYHtyfQ0KYWdlIDwtIGMoNywzOCwzMikNCnByZW5vbSA8LSBjKCJhcm5hdWQiLCJuaWNvbGFzIiwibGF1cmVudCIpDQp2aWxsZSA8LSBmYWN0b3IoYygicmVubmVzIiwicmVubmVzIiwibWFyc2VpbGxlIikpDQppbmRpdiA8LSBjYmluZC5kYXRhLmZyYW1lKGFnZSxwcmVub20sdmlsbGUpDQppbmRpdg0KcG9wdWxhdGlvbiA8LSBjKDIwMCw1MDAsODAwKQ0KY2FyYWN0dmlsbGVzIDwtIGNiaW5kLmRhdGEuZnJhbWUoYygicmVubmVzIiwibHlvbiIsIm1hcnNlaWxsZSIpLHBvcHVsYXRpb24pDQpuYW1lcyhjYXJhY3R2aWxsZXMpIDwtIGMoInZpbGxlIiwicG9wIikNCmNhcmFjdHZpbGxlcw0KbWVyZ2UoaW5kaXYsY2FyYWN0dmlsbGVzLGJ5PSJ2aWxsZSIpDQpgYGANCg0KIyAyLjYgVGFibGVhdSBjcm9pc8OpDQoNCmBgYHtyfQ0KdGVuc2lvbiA8LSBmYWN0b3IoYyhyZXAoIkZhaWJsZSIsNSkscmVwKCJGb3J0ZSIsNSkpKQ0KdGVuc2lvbg0KbGFpbmUgPC0gZmFjdG9yKGMocmVwKCJNZXIiLDMpLHJlcCgiQW5nIiwzKSxyZXAoIlRleCIsNCkpKQ0KbGFpbmUNCmRvbiA8LSBjYmluZC5kYXRhLmZyYW1lKHRlbnNpb24sbGFpbmUpDQpkb24NCnRhYmxlKGRvbiR0ZW5zaW9uLGRvbiRsYWluZSkNCnRhYmNyb2lzZSA8LSB4dGFicyh+dGVuc2lvbitsYWluZSxkYXRhPWRvbikNCnRhYmNyb2lzZQ0KdGFiZnJhbWUgPC0gYXMuZGF0YS5mcmFtZSh0YWJjcm9pc2UpDQp0YWJmcmFtZQ0KYGBgDQoNCg==