8 5- folds for SUPPORT VECTOR MACHINE 1
<-read.csv("data/CleanedData.csv",header = T,colClasses=c("NULL", rep(NA, 13)))
data$Sex<- ifelse(data$Sex=="m", 1, 0)
data
set.seed(700)
<- 5
V <- NROW(data); n0 <- sum(data$Category==0); n1 <- n-n0;
n = c()
mis.class.rate =c()
discri.rate<- 1:n
id.fold $Category==0] <- sample(x=1:V, size=n0, replace=TRUE)
id.fold[data$Category==1] <- sample(x=1:V, size=n1, replace=TRUE)
id.fold[datafor (v in 1:V) {
= c(mis.class.rate, v)
mis.class.rate =c(discri.rate, v)
discri.rate<- data[id.fold!=v, ]; test.v <- data[id.fold==v, ];
train.v
<- train.v[, -1]; X.test <- test.v[, -1]
X.train <- scale(X.train, center=TRUE, scale = TRUE)
scale.train 2:14] <- as.data.frame(scale.train)
train.v[,2:14] <- as.data.frame(scale(X.test, center=attributes(scale.train)$`scaled:center`, scale=attributes(scale.train)$`scaled:scale`))
test.v[,
<- test.v[, 1]
yobs
##Tuning Parameter and Best Parameter
<- tune.svm(Category ~ ., data = train.v,
tunpar gamma = 10^(-10:-5), cost = 10^(-4:4),
nrepeat=1, tunecontrol = tune.control(sampling = "cross",cross=V))
<- tunpar$best.parameters[[1]]
bestGamma <- tunpar$best.parameters[[2]]
bestC
<- svm(Category ~ ., data=train.v, cost = bestC, gamma = bestGamma)
fit.svm summary(fit.svm)
# 0/1 PREDICTION
<- predict(fit.svm, test.v[,-1])
pred.svm = auc(yobs, pred.svm)
discri.rate[v] print(paste("AUC for fold", v, ":", discri.rate[v]))
<- mean(yobs != (pred.svm>0.5))
mis.class.rate[v] print(paste("Missclassification rate for fold", v,
":",mis.class.rate[v]))
}
## [1] "AUC for fold 1 : 0.978538579458355"
## [1] "Missclassification rate for fold 1 : 0.0819672131147541"
## [1] "AUC for fold 2 : 0.878434065934066"
## [1] "Missclassification rate for fold 2 : 0.08"
## [1] "AUC for fold 3 : 0.971296296296296"
## [1] "Missclassification rate for fold 3 : 0.0593220338983051"
## [1] "AUC for fold 4 : 0.993362831858407"
## [1] "Missclassification rate for fold 4 : 0.0852713178294574"
## [1] "AUC for fold 5 : 0.96210407239819"
## [1] "Missclassification rate for fold 5 : 0.0909090909090909"
plot(pred.svm)
#AVERAGE AUC AND MISCLASSIFICATION RATE
<-print(paste("Average of AUC:", mean(discri.rate))) Average.AUC.svm1
## [1] "Average of AUC: 0.956747169189063"
<-print(paste("Average of Miss:", mean(mis.class.rate))) Average.Misclas.svm1
## [1] "Average of Miss: 0.0794939311503215"
<- mean(discri.rate)
AUC.SVM1<-mean(mis.class.rate) Miss.rate.SVM1
Note 5
SVM AUC: 0.96, SVM Missclassification Rate: 0.079↩︎