1. Importer les données

library(kernlab)
data(spam)
summary(spam[,56:58])
  capitalLong       capitalTotal          type     
 Min.   :   1.00   Min.   :    1.0   nonspam:2788  
 1st Qu.:   6.00   1st Qu.:   35.0   spam   :1813  
 Median :  15.00   Median :   95.0                 
 Mean   :  52.17   Mean   :  283.3                 
 3rd Qu.:  43.00   3rd Qu.:  266.0                 
 Max.   :9989.00   Max.   :15841.0                 
set.seed(5678)
perm <- sample(4601,3000)
app <- spam[perm,]
valid <- spam[-perm,]

2. Construire et analyser un algorithme de SVM

mod.svm <- ksvm(type~.,data=app,kernel="vanilladot",C=1)
 Setting default kernel parameters  
mod.svm
Support Vector Machine object of class "ksvm" 

SV type: C-svc  (classification) 
 parameter : cost C = 1 

Linear (vanilla) kernel function. 

Number of Support Vectors : 599 

Objective Function Value : -546.7826 
Training error : 0.067 

3. Sélectionner les paramètres d’un SVM

C <- c(0.1,1,10,100)
degree <- c(1,2,3)
scale <- 1
sigma <- c(0.0001,0.001,0.01,0.1,1)
library(caret)
gr.poly <- expand.grid(C=C,degree=degree,scale=scale)
ctrl <- trainControl(method="cv",number=3)
set.seed(123)
sel.poly <- train(type~.,data=app,method="svmPoly",trControl=ctrl,tuneGrid=gr.poly)
sel.poly
Support Vector Machines with Polynomial Kernel 

3000 samples
  57 predictor
   2 classes: 'nonspam', 'spam' 

No pre-processing
Resampling: Cross-Validated (3 fold) 
Summary of sample sizes: 2000, 2000, 2000 
Resampling results across tuning parameters:

  C      degree  Accuracy   Kappa    
    0.1  1       0.9270000  0.8461040
    0.1  2       0.9100000  0.8105364
    0.1  3       0.9083333  0.8077676
    1.0  1       0.9280000  0.8479742
    1.0  2       0.8936667  0.7774023
    1.0  3       0.8920000  0.7745405
   10.0  1       0.9270000  0.8460247
   10.0  2       0.8853333  0.7608912
   10.0  3       0.8830000  0.7553775
  100.0  1       0.9240000  0.8395595
  100.0  2       0.8806667  0.7518021
  100.0  3       0.8763333  0.7413648

Tuning parameter 'scale' was held constant at a value of 1
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were degree = 1, scale = 1 and C = 1.
gr.radial <- expand.grid(C=C,sigma=sigma)
set.seed(345)
sel.radial <- train(type~.,data=app,method="svmRadial",trControl=ctrl,tuneGrid=gr.radial)
sel.radial
Support Vector Machines with Radial Basis Function Kernel 

3000 samples
  57 predictor
   2 classes: 'nonspam', 'spam' 

No pre-processing
Resampling: Cross-Validated (3 fold) 
Summary of sample sizes: 2000, 2000, 2000 
Resampling results across tuning parameters:

  C      sigma  Accuracy   Kappa    
    0.1  1e-04  0.6060000  0.0000000
    0.1  1e-03  0.7663333  0.4578780
    0.1  1e-02  0.8990000  0.7825103
    0.1  1e-01  0.7610000  0.4447726
    0.1  1e+00  0.6060000  0.0000000
    1.0  1e-04  0.7723333  0.4734765
    1.0  1e-03  0.9000000  0.7855698
    1.0  1e-02  0.9276667  0.8470048
    1.0  1e-01  0.9050000  0.7958078
    1.0  1e+00  0.7576667  0.4321881
   10.0  1e-04  0.8970000  0.7790195
   10.0  1e-03  0.9280000  0.8477454
   10.0  1e-02  0.9353333  0.8636455
   10.0  1e-01  0.9040000  0.7948708
   10.0  1e+00  0.7626667  0.4475051
  100.0  1e-04  0.9203333  0.8315819
  100.0  1e-03  0.9333333  0.8594359
  100.0  1e-02  0.9283333  0.8486409
  100.0  1e-01  0.8966667  0.7797074
  100.0  1e+00  0.7603333  0.4430619

Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 0.01 and C = 10.
mod.poly <- ksvm(type~.,data=app,kernel="polydot",kpar=list(degree=1,scale=1,offset=1),C=1,prob.model = TRUE)
mod.radial <- ksvm(type~.,data=app,kernel="rbfdot",kpar=list(sigma=0.01),C=10,prob.model = TRUE)

4. Faire de la prévision

prev.class.poly <- predict(mod.poly,newdata=valid)
prev.class.radial <- predict(mod.radial,newdata=valid)
prev.class.poly[1:10]
 [1] spam    spam    spam    spam    spam    spam    spam    spam    nonspam spam   
Levels: nonspam spam
prev.class.radial[1:10]
 [1] spam    spam    spam    spam    spam    spam    spam    spam    nonspam spam   
Levels: nonspam spam
prev.prob.poly <- predict(mod.poly,newdata=valid,type="probabilities")
prev.prob.radial <- predict(mod.radial,newdata=valid,type="probabilities")
round(head(prev.prob.poly),3)
     nonspam  spam
[1,]   0.056 0.944
[2,]   0.412 0.588
[3,]   0.000 1.000
[4,]   0.172 0.828
[5,]   0.005 0.995
[6,]   0.066 0.934

5. Estimer les performances de l’algorithme

library(tidyverse)
prev.class <- data.frame(poly=prev.class.poly,radial=prev.class.radial,obs=valid$type)
prev.class %>% summarise_all(funs(err=mean(obs!=.))) %>% select(-obs_err) %>% round(3)
  poly_err radial_err
1    0.082      0.077
library(plotROC)
prev.prob <- data.frame(poly=prev.prob.poly[,2],radial=prev.prob.radial[,2],obs=valid$type)
df.roc <- prev.prob %>% gather(key=Methode,value=score,poly,radial)
ggplot(df.roc)+aes(d=obs,m=score,color=Methode)+geom_roc()+theme_classic()

library(pROC)
df.roc %>% group_by(Methode) %>% summarize(AUC=pROC::auc(obs,score))
# A tibble: 2 x 2
  Methode   AUC
  <chr>   <dbl>
1 poly    0.966
2 radial  0.973
LS0tDQp0aXRsZTogIlNWTSINCmF1dGhvcjogIkh1c3NvbiBldCBhbC4iDQpkYXRlOiAiNiBzZXB0ZW1icmUgMjAxOCINCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMw0KICAgIHRvY19mbG9hdDogeWVzDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZGVwdGg6ICczJw0KICAgIHRvY19mbG9hdDogeWVzDQotLS0NCg0KIyAxLiBJbXBvcnRlciBsZXMgZG9ubsOpZXMNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShrZXJubGFiKQ0KZGF0YShzcGFtKQ0Kc3VtbWFyeShzcGFtWyw1Njo1OF0pDQpgYGANCg0KYGBge3J9DQpzZXQuc2VlZCg1Njc4KQ0KcGVybSA8LSBzYW1wbGUoNDYwMSwzMDAwKQ0KYXBwIDwtIHNwYW1bcGVybSxdDQp2YWxpZCA8LSBzcGFtWy1wZXJtLF0NCmBgYA0KDQojIDIuIENvbnN0cnVpcmUgZXQgYW5hbHlzZXIgdW4gYWxnb3JpdGhtZSBkZSBTVk0NCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbW9kLnN2bSA8LSBrc3ZtKHR5cGV+LixkYXRhPWFwcCxrZXJuZWw9InZhbmlsbGFkb3QiLEM9MSkNCm1vZC5zdm0NCmBgYA0KDQojIDMuIFPDqWxlY3Rpb25uZXIgbGVzIHBhcmFtw6h0cmVzIGTigJl1biBTVk0NCg0KYGBge3J9DQpDIDwtIGMoMC4xLDEsMTAsMTAwKQ0KZGVncmVlIDwtIGMoMSwyLDMpDQpzY2FsZSA8LSAxDQpzaWdtYSA8LSBjKDAuMDAwMSwwLjAwMSwwLjAxLDAuMSwxKQ0KYGBgDQoNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShjYXJldCkNCmdyLnBvbHkgPC0gZXhwYW5kLmdyaWQoQz1DLGRlZ3JlZT1kZWdyZWUsc2NhbGU9c2NhbGUpDQpjdHJsIDwtIHRyYWluQ29udHJvbChtZXRob2Q9ImN2IixudW1iZXI9MykNCnNldC5zZWVkKDEyMykNCnNlbC5wb2x5IDwtIHRyYWluKHR5cGV+LixkYXRhPWFwcCxtZXRob2Q9InN2bVBvbHkiLHRyQ29udHJvbD1jdHJsLHR1bmVHcmlkPWdyLnBvbHkpDQpzZWwucG9seQ0KDQpnci5yYWRpYWwgPC0gZXhwYW5kLmdyaWQoQz1DLHNpZ21hPXNpZ21hKQ0Kc2V0LnNlZWQoMzQ1KQ0Kc2VsLnJhZGlhbCA8LSB0cmFpbih0eXBlfi4sZGF0YT1hcHAsbWV0aG9kPSJzdm1SYWRpYWwiLHRyQ29udHJvbD1jdHJsLHR1bmVHcmlkPWdyLnJhZGlhbCkNCnNlbC5yYWRpYWwNCmBgYA0KDQpgYGB7cn0NCm1vZC5wb2x5IDwtIGtzdm0odHlwZX4uLGRhdGE9YXBwLGtlcm5lbD0icG9seWRvdCIsa3Bhcj1saXN0KGRlZ3JlZT0xLHNjYWxlPTEsb2Zmc2V0PTEpLEM9MSxwcm9iLm1vZGVsID0gVFJVRSkNCm1vZC5yYWRpYWwgPC0ga3N2bSh0eXBlfi4sZGF0YT1hcHAsa2VybmVsPSJyYmZkb3QiLGtwYXI9bGlzdChzaWdtYT0wLjAxKSxDPTEwLHByb2IubW9kZWwgPSBUUlVFKQ0KDQpgYGANCg0KIyA0LiBGYWlyZSBkZSBsYSBwcsOpdmlzaW9uDQoNCmBgYHtyfQ0KcHJldi5jbGFzcy5wb2x5IDwtIHByZWRpY3QobW9kLnBvbHksbmV3ZGF0YT12YWxpZCkNCnByZXYuY2xhc3MucmFkaWFsIDwtIHByZWRpY3QobW9kLnJhZGlhbCxuZXdkYXRhPXZhbGlkKQ0KcHJldi5jbGFzcy5wb2x5WzE6MTBdDQpwcmV2LmNsYXNzLnJhZGlhbFsxOjEwXQ0KYGBgDQoNCmBgYHtyfQ0KcHJldi5wcm9iLnBvbHkgPC0gcHJlZGljdChtb2QucG9seSxuZXdkYXRhPXZhbGlkLHR5cGU9InByb2JhYmlsaXRpZXMiKQ0KcHJldi5wcm9iLnJhZGlhbCA8LSBwcmVkaWN0KG1vZC5yYWRpYWwsbmV3ZGF0YT12YWxpZCx0eXBlPSJwcm9iYWJpbGl0aWVzIikNCnJvdW5kKGhlYWQocHJldi5wcm9iLnBvbHkpLDMpDQpgYGANCg0KIyA1LiBFc3RpbWVyIGxlcyBwZXJmb3JtYW5jZXMgZGUgbOKAmWFsZ29yaXRobWUNCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpwcmV2LmNsYXNzIDwtIGRhdGEuZnJhbWUocG9seT1wcmV2LmNsYXNzLnBvbHkscmFkaWFsPXByZXYuY2xhc3MucmFkaWFsLG9icz12YWxpZCR0eXBlKQ0KcHJldi5jbGFzcyAlPiUgc3VtbWFyaXNlX2FsbChmdW5zKGVycj1tZWFuKG9icyE9LikpKSAlPiUgc2VsZWN0KC1vYnNfZXJyKSAlPiUgcm91bmQoMykNCmBgYA0KDQpgYGB7cixtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KHBsb3RST0MpDQpwcmV2LnByb2IgPC0gZGF0YS5mcmFtZShwb2x5PXByZXYucHJvYi5wb2x5WywyXSxyYWRpYWw9cHJldi5wcm9iLnJhZGlhbFssMl0sb2JzPXZhbGlkJHR5cGUpDQpkZi5yb2MgPC0gcHJldi5wcm9iICU+JSBnYXRoZXIoa2V5PU1ldGhvZGUsdmFsdWU9c2NvcmUscG9seSxyYWRpYWwpDQpnZ3Bsb3QoZGYucm9jKSthZXMoZD1vYnMsbT1zY29yZSxjb2xvcj1NZXRob2RlKStnZW9tX3JvYygpK3RoZW1lX2NsYXNzaWMoKQ0KDQpgYGANCg0KYGBge3IsbWVzc2FnZT1GQUxTRSx3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShwUk9DKQ0KZGYucm9jICU+JSBncm91cF9ieShNZXRob2RlKSAlPiUgc3VtbWFyaXplKEFVQz1wUk9DOjphdWMob2JzLHNjb3JlKSkNCmBgYA0KDQo=