library(kernlab)
data(spam)
Mise en forme pour gbm
spam$type <- as.numeric(spam$type)-1
set.seed(5678)
perm <- sample(4601,3000)
app <- spam[perm,]
valid <- spam[-perm,]
library(gbm)
set.seed(1234)
gbm(type~., data=app, distribution="adaboost", shrinkage=0.01, n.trees=3000)
gbm(formula = type ~ ., distribution = "adaboost", data = app,
n.trees = 3000, shrinkage = 0.01)
A gradient boosted model with adaboost loss function.
3000 iterations were performed.
There were 57 predictors of which 36 had non-zero influence.
set.seed(1234)
mod.ada <- gbm(type~.,data=app,distribution="adaboost",cv.folds=5, shrinkage=0.01,n.trees=3000)
set.seed(567)
mod.logit <- gbm(type~.,data=app,distribution="bernoulli",cv.folds=5, shrinkage=0.05,n.trees=3000)
Mopt.ada <- gbm.perf(mod.ada,method="cv")
Mopt.ada
[1] 1740
Mopt.logit <- gbm.perf(mod.logit,method="cv")
Mopt.logit
[1] 1007
prev.ada <- predict(mod.ada,newdata=valid,type="response", n.trees=Mopt.ada)
head(round(prev.ada,3))
[1] 0.998 0.421 0.996 0.746 0.963 0.998
prev.logit <- predict(mod.logit,newdata=valid,type="response", n.trees=Mopt.ada)
head(round(prev.logit,3))
[1] 0.997 0.812 0.999 0.953 0.997 0.999
prev.prob <- data.frame(ada=prev.ada,logit=prev.logit,obs=valid$type)
head(round(prev.prob,3))
ada logit obs
1 0.998 0.997 1
2 0.421 0.812 1
3 0.996 0.999 1
4 0.746 0.953 1
5 0.963 0.997 1
6 0.998 0.999 1
prev.class <- round(prev.prob)
head(prev.class)
ada logit obs
1 1 1 1
2 0 1 1
3 1 1 1
4 1 1 1
5 1 1 1
6 1 1 1
library(tidyverse)
prev.class %>% summarise_all(funs(err=mean(obs!=.))) %>% select(-obs_err) %>% round(3)
ada_err logit_err
1 0.069 0.062
library(plotROC)
df.roc <- prev.prob %>% gather(key=Methode,value=score,ada,logit)
ggplot(df.roc)+aes(d=obs,m=score,color=Methode)+ geom_roc()+theme_classic()
library(pROC)
df.roc %>% group_by(Methode) %>% summarize(AUC=pROC::auc(obs,score))
# A tibble: 2 x 2
Methode AUC
<chr> <dbl>
1 ada 0.978
2 logit 0.981
summary(mod.logit)[1:10,]
var rel.inf
charExclamation charExclamation 22.107888
charDollar charDollar 18.328872
remove remove 12.903802
free free 7.229789
your your 6.039477
hp hp 4.981167
capitalAve capitalAve 4.909642
capitalLong capitalLong 4.014214
report report 2.208803
our our 2.091343