Chapter 7 支持向量机(SMO算法)

require(magrittr)
require(purrrlyr)
require(ggplot2)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:dplyr':
## 
##     vars
rm(list = ls())

dataset_created <- function(start = -2, end = 2, 
                            length = 2500, digits = 4,
                            sd = 2, seed = 201709) {
  set.seed(seed)
  kb <- rnorm(2, 0, 2) %>% round(., 2)
  k <- kb[1]
  b <- kb[2]
  x1 <- sample(seq(start, end, length.out = length))
  x1 <- round(x1, digits)
  x2 <- k * x1 + b 
  mean <- 0  #mean 必须为0 
  x_mat1 <- NULL
  x_mat2 <- NULL
  
  for (i in x1) {
    x2_ <- k * i + b + rnorm(1, mean = mean, sd = sd)
    x2_ <- round(x2_, digits)
    x2 <- k * i + b
    if (x2_ > x2 + 1) {
      x_mat1_ <- matrix(c(i, x2_, 1), 1,3)
      x_mat1 <- rbind(x_mat1, x_mat1_)
    } else if (x2_ < x2 - 1) {
      x_mat2_ <- matrix(c(i, x2_, -1), 1,3)
      x_mat2 <- rbind(x_mat2, x_mat2_)
    }
  }
  dataset <- rbind(x_mat1, x_mat2)
  return(dataset)
}
########################
#先判断输入的totalInputs是否是矩阵,
#如果是,再根据type的不同类型计算Output;
#如果不是,也根据type的不同类型计算Output;

kernel <- function(totalInputs, input, 
                   type = "gaussian") {
  degree <- 2
  sigma <- 1
  if (is.matrix(totalInputs)) {
    nInputs <- nrow(totalInputs)
    Output <- matrix(NA, nInputs, 1)
    input <- as.matrix(input)
    if (type == "linear") {
      for (i in seq(nInputs)) {
        totalInputs_i <- as.matrix(totalInputs[i, ])
        Output[i, 1] <- t(totalInputs_i) %*% input
      }
    } else if (type == "poly") {
      for (i in seq(nInputs)) {
        totalInputs_i <- as.matrix(totalInputs[i, ])
        Output[i, 1] <- (t(totalInputs_i) %*% input + 1)^degree
      }
    } else if (type == "gaussian") {
      for (i in seq(nInputs)) {
        totalInputs_i <- as.matrix(totalInputs[i, ])
        normvalue <- norm(totalInputs_i - input, type = "2")
        Output[i, 1] <- exp(-normvalue^2 / (2 * sigma^2))
      }
    }
  } else {
    totalInputs <- as.matrix(totalInputs)
    input <- as.matrix(input)
    if (type == "linear") {
      Output <- t(totalInputs) %*% input
      Output <- as.numeric(Output)
    } else if (type == "poly") {
      Output <- (t(totalInputs) %*% input + 1)^degree
      Output <- as.numeric(Output)
    } else if (type == "gaussian") {
      normvalue <- norm(totalInputs - input, type = "2")
      Output <- exp(-normvalue^2 / (2 * sigma^2))
      Output <- as.numeric(Output)
    }
  }
  return(Output)
}
# SMO算法
dataset <- dataset_created(length = 50)
class(dataset)
## [1] "matrix"
dataset[, 1:2] <- scale(dataset[,1:2]) %>% 
  round(.,2)
dataset
##        [,1]  [,2] [,3]
##  [1,] -0.62  1.77    1
##  [2,] -0.76  0.53    1
##  [3,] -1.23 -0.10    1
##  [4,]  0.13  0.81    1
##  [5,]  0.26  0.46    1
##  [6,] -1.16  0.94    1
##  [7,] -0.69  0.85    1
##  [8,]  1.01  0.90    1
##  [9,]  1.35  0.98    1
## [10,]  1.48  1.40    1
## [11,] -1.30  0.11    1
## [12,]  0.81  1.38    1
## [13,]  1.21  0.98    1
## [14,] -0.42  1.16    1
## [15,]  0.94  0.77    1
## [16,]  0.33  1.00    1
## [17,] -0.15  0.32    1
## [18,]  1.28  0.86    1
## [19,] -1.10  0.18    1
## [20,] -1.64 -0.13    1
## [21,]  1.14 -0.52   -1
## [22,]  1.08 -1.23   -1
## [23,] -0.89 -0.98   -1
## [24,] -0.82 -1.70   -1
## [25,] -0.28 -0.75   -1
## [26,]  0.47 -0.78   -1
## [27,] -0.48 -1.08   -1
## [28,]  1.62 -0.86   -1
## [29,] -0.35 -0.84   -1
## [30,] -1.03 -1.14   -1
## [31,] -0.55 -0.99   -1
## [32,] -1.71 -1.20   -1
## [33,]  1.42 -0.12   -1
## [34,]  0.74 -1.62   -1
## [35,] -0.08 -1.38   -1
original_data <- dataset

C <- 2 # C的取值很关键
tolerance <- 0.000001
alpha <- matrix(0, nrow(dataset), 1)
b <- 0
max_iter <- 50
counter <- 0

while (length(which(alpha[,1] == 0)) > 0 &&
       counter < max_iter) {
  print(counter)
  input <- dataset[, -ncol(dataset)]
  target <- dataset[, ncol(dataset)] %>% as.matrix()
  samplesAmout <- nrow(input)
  kernelval <- matrix(NA, samplesAmout, samplesAmout)
  # kernelval[, 1] <- kernel(input, input[1, ], "linear")
  # kernelval[, 2] <- kernel(input, input[2, ], "linear")
  for (i in seq(samplesAmout)) {
    kernelval[, i] <- kernel(input, input[i, ], "linear")
  }
  alpha <- matrix(0, samplesAmout, 1)
  for (i in seq(samplesAmout)) {
    Ei <- sum(alpha * target * kernelval[, i]) + b - target[i, 1]
    if (((Ei * target[i, 1] < -tolerance) && (alpha[i, 1] < C)) ||
        ((Ei * target[i, 1] > tolerance) && (alpha[i, 1] > 0))) {
      for (j in seq(samplesAmout)) {
        if (j != i) {
          Ej <- sum(alpha * target * kernelval[, j]) + b - target[j, 1]
          alpha_i_old <- as.numeric(alpha[i, 1])
          alpha_j_old <- as.numeric(alpha[j, 1])
          if (as.numeric(target[i, 1]) == as.numeric(target[j, 1])) {
            L <- max(0, alpha[i, 1] + alpha[j, 1] - C)
            H <- min(C, alpha[i, 1] + alpha[j, 1])
          } else {
            L <- max(0, alpha[j, 1] - alpha[i, 1])
            H <- min(C, C + alpha[j, 1] - alpha[i, 1])
          }
          if (H == L) {
            next
          }
          eta <- 2 * kernelval[j, i] - kernelval[i, i] - kernelval[j, j]
          if (eta >= 0) {
            next
          }
          alpha[j, 1] <- alpha[j, 1] - (target[j, 1] * (Ei - Ej)) / eta
          if (alpha[j, 1] > H) {
            alpha[j, 1] <- H
          } else if (alpha[j, 1] < L) {
            alpha[j, 1] <- L
          }
          if (abs(alpha[j, 1] - alpha_j_old) < tolerance) {
            next
          }
          alpha[i, 1] <- alpha[i, 1] + target[i, 1] * 
            target[j, 1] * (alpha_j_old - alpha[j, 1])
          
          b1 <- b - Ei - target[i, 1] * 
            (alpha[i, 1] - alpha_i_old) * kernelval[i, i] - 
            target[j, 1] * (alpha[j, 1] - alpha_j_old) * 
            kernelval[i, j]
          
          b2 <- b - Ej - target[i, 1] * 
            (alpha[i, 1] - alpha_i_old) * kernelval[i, j] - 
            target[j, 1] * (alpha[j, 1] - alpha_j_old) * 
            kernelval[j, j]
          
          if ((0 < alpha[i, 1]) && (alpha[i, 1] < C)) {
            b <- b1
          }else if ((0 < alpha[j, 1]) && (alpha[j, 1] < C)) {
            b <- b2
          } else {
            b <- (b1 + b2) / 2
          }
        }
      }
    }
  }
  counter <- counter + 1
  dataset <- dataset[which(alpha[, 1] != 0), ]
}
## [1] 0
## [1] 1
## [1] 2
## [1] 3
totalSum <- 0
input <- dataset[, -ncol(dataset)] #dataset 已经被缩减
samplesAmout <- nrow(input)
for (i in seq(samplesAmout)) {
  totalSum <- totalSum + alpha[i, 1] * target[i, 1] * input[i, ]
}

W <- totalSum %>% as.matrix()
b <- target[1, 1] - input[1, ] %*% W  #要取0 < alphai < C 对应的样本点

##输出结果
alpha
##      [,1]
## [1,]    2
## [2,]    2
W
##       [,1]
## [1,] -0.68
## [2,]  1.76
b
##        [,1]
## [1,] 0.3396
y <- original_data[, 2]
x <- vector("numeric", length = length(y))
for (i in seq(length(y))) {
  # 与kernel函数的type参数有关
  x[i] <- (-W[2,1] * y[i] - b) / W[1,1]  
}

dataset_df <- as.data.frame(original_data)
label <- factor(dataset_df[, 3])  
ggplot() + 
  geom_point(aes(dataset_df[, 1], dataset_df[, 2], col = label)) +
  geom_line(aes(x = x, y = y))