# Chapter 9 EM算法

``````rm(list = ls())
y <- c(-67, -48, 6, 8, 14, 16, 23, 24, 28, 29, 41, 49, 56, 60, 75)

setClass("GAUSS", representation(mu = "numeric",
sig2 = "numeric",
yi = "numeric",
value = "numeric"))

GAUSSFUN_k <- function(mu = c(100, 81),
sig2 = c(6400, 3600),
yi, k) {
GAUSSFUNobj <- new("GAUSS", mu = as.numeric(mu)[k],
sig2 = as.numeric(sig2)[k],
yi = as.numeric(yi))
GAUSSFUNobj@value <- 1 / (sqrt(2 * pi) * sqrt(GAUSSFUNobj@sig2)) *
exp(-(GAUSSFUNobj@yi - GAUSSFUNobj@mu)^2 / (2 * GAUSSFUNobj@sig2))
GAUSSFUNobj
}

GAUSSFUN_k(yi = 64, k = 1)``````
``````## An object of class "GAUSS"
## Slot "mu":
## [1] 100
##
## Slot "sig2":
## [1] 6400
##
## Slot "yi":
## [1] 64
##
## Slot "value":
## [1] 0.004506587``````
``class(GAUSSFUN_k(yi = 64, k = 1))``
``````## [1] "GAUSS"
## attr(,"package")
## [1] ".GlobalEnv"``````
``````setClass("gammajk",
representation(alphak = "numeric",
density_fun = "GAUSS",
gammaval = "numeric"))

gammajkFun <- function(mu=c(100,81), sig2=c(6400,3600),
yi, alphak=c(0.5,0.5), k) {
sumval <- 0
for (i in 1:2) {
gaussobj <- GAUSSFUN_k(mu, sig2, yi = yi, k = i)
sumval_ <- alphak[i] * gaussobj@value
sumval <- sumval + sumval_
}
gaussobj <- NULL
gaussobj2 <- GAUSSFUN_k(mu, sig2, yi = yi, k = k)
gammaval <- alphak[k]*gaussobj2@value / sumval
gammajkobj <- new("gammajk", alphak = alphak,
density_fun = gaussobj2,
gammaval = gammaval)
return(gammajkobj)
}

setValidity("gammajk", function(obj) {
error <- 0.001
if (abs(1 - sum(obj@alphak)) > error) {
return("all the alphak not sum to 1")
}
return(TRUE)
})``````
``## NOTE: arguments in definition for validity method for class 'gammajk' changed from (obj) to (object)``
``````## Class "gammajk" [in ".GlobalEnv"]
##
## Slots:
##
## Name:       alphak density_fun    gammaval
## Class:     numeric       GAUSS     numeric``````
``gammajkFun(yi = 164, k = 2)``
``````## An object of class "gammajk"
## Slot "alphak":
## [1] 0.5 0.5
##
## Slot "density_fun":
## An object of class "GAUSS"
## Slot "mu":
## [1] 81
##
## Slot "sig2":
## [1] 3600
##
## Slot "yi":
## [1] 164
##
## Slot "value":
## [1] 0.002554015
##
##
## Slot "gammaval":
## [1] 0.413595``````
``class(gammajkFun(yi = 164, k = 2))``
``````## [1] "gammajk"
## attr(,"package")
## [1] ".GlobalEnv"``````
``````nk_fun <- function(y, k,
mu = c(100,81),
sig2 = c(6400,3600),
alphak = c(0.5,0.5)) {
nk <- 0
for (i in seq(length(y))) {
gammajkobj <- gammajkFun(yi = y[i], k = k,
mu = mu, sig2 = sig2,
alphak = alphak)
gammaval_ <- gammajkobj@gammaval
nk <- nk + gammaval_
}
nk
}

nk_fun(y = y, k = 2)``````
``## [1] 8.225333``
``nk_fun(y = y, k = 1)``
``## [1] 6.774667``
``````completedata_loglikefun <- function(y,
mu = c(100,81),
sig2 = c(6400,3600),
alphak = c(0.5,0.5)) {
sum_outter <- 0
for (k in 1:2) {
nk_val <- nk_fun(y = y, k = k,
mu = mu, sig2 = sig2,
alphak = alphak)
sum_inner <- 0
for (i in seq(length(y))) {
gammajkobj <- gammajkFun(mu = mu, sig2 = sig2,
yi = y[i], alphak = alphak,
k = k)
sum_inner_ <- gammajkobj@gammaval*(log(1/sqrt(2*pi)) -
log(sqrt(gammajkobj@density_fun@sig2)) -
1/(2*gammajkobj@density_fun@sig2) *
(y[i] - gammajkobj@density_fun@mu)^2)
sum_inner <- sum_inner + sum_inner_
}
sum_outter <- sum_outter + nk_val * log(alphak[k]) + sum_inner
}
sum_outter
}
completedata_loglikefun(y = y)``````
``## [1] -96.95065``
``````old_loglike <- -Inf
diff_loglike <- Inf
e <- 0.00001
y <- c(-67, -48, 6, 8, 14, 16, 23, 24, 28, 29, 41, 49, 56, 60, 75)
mu <- c(16, 20)
sig2 <- c(1210, 1440)
alphak <- c(0.5, 0.5)
step <- 1

# old_loglike <- -Inf
# diff_loglike <- Inf
# e <- 0.00001
# set.seed(7)
# y1 <- rnorm(50, mean = 23, sd = 3)
# y2 <- rnorm(50, mean = 7,  sd = 10)
# y <- sample(c(y1, y2))
# mu <- c(18, 6)
# sig2 <- c(8, 90)
# alphak <- c(0.5, 0.5)
# step <- 1
#
# old_loglike <- -Inf
# diff_loglike <- Inf
# e <- 0.00001
# set.seed(7)
# y1 <- rnorm(50, mean = 23, sd = 3)
# y2 <- rnorm(50, mean = 7,  sd = 10)
# y <- sample(c(y1, y2))
# mu <- c(23, 7)
# sig2 <- c(9, 100)
# alphak <- c(0.5, 0.5)
# step <- 1
#
# old_loglike <- -Inf
# diff_loglike <- Inf
# e <- 0.00001
# set.seed(7)
# y1 <- rnorm(50, mean = 23, sd = 3)
# y2 <- rnorm(50, mean = 7,  sd = 10)
# y <- sample(c(y1, y2))
# mu <- c(23, 7)
# sig2 <- c(9, 100)
# alphak <- c(0.25, 0.75)
# step <- 1

while (diff_loglike > e) {
cat("step = ", step)
cat("\n")
cat("alphak = ", alphak)
cat("\n")
matx_response <- matrix(NA, length(y), 2)
for (k in 1:2) {
for (i in 1:length(y)) {
gammajkobj <- gammajkFun(mu = mu, sig2 = sig2,
yi = y[i], alphak = alphak,
k = k)
matx_response[i,k] <- gammajkobj@gammaval
}
#cat(sum(as.numeric(matx_response[, k])))
#cat("\n")
#cat(nk_fun(y = y, k = k, mu = mu, sig2 = sig2, alphak = alphak))
#cat("\n")
}
model_list <- vector("list", length = 2)
for (kk in 1:2) {
#初始化y[1]给yi,
#gaussobj对象的mu, sig2,与yi 无关的
#gaussobj对象的value与yi有关,
#但下文代码用不到与具体yi有关的value, 所以, 随意初始化yi
gaussobj <- GAUSSFUN_k(mu, sig2, yi = y[1], k = kk) #
old_mu <- gaussobj@mu
gaussobj@mu <- as.numeric(t(matx_response[, kk]) %*%
as.matrix(y) /
sum(as.numeric(matx_response[, kk])))
gaussobj@sig2 <- as.numeric(t(matx_response[, kk]) %*%
as.matrix((y - old_mu)^2) /
sum(as.numeric(matx_response[, kk])))
mu[kk] <- gaussobj@mu
sig2[kk] <- gaussobj@sig2
alphak[kk] <- sum(as.numeric(matx_response[, kk])) / length(y)
model_list[[kk]] <- gaussobj
}
newloglike <- completedata_loglikefun(y = y,
mu = mu, sig2 = sig2,
alphak = alphak)
diff_loglike <- newloglike - old_loglike
old_loglike <- newloglike
step <- step + 1
cat("diff_loglike = ", diff_loglike)
cat("\n")
}``````
``````## step =  1
## alphak =  0.5 0.5
## diff_loglike =  Inf
## step =  2
## alphak =  0.4974163 0.5025837
## diff_loglike =  0.0103793
## step =  3
## alphak =  0.4974829 0.5025171
## diff_loglike =  0.03200098
## step =  4
## alphak =  0.4974934 0.5025066
## diff_loglike =  0.1081907
## step =  5
## alphak =  0.4975559 0.5024441
## diff_loglike =  0.345021
## step =  6
## alphak =  0.4979327 0.5020673
## diff_loglike =  0.9073021
## step =  7
## alphak =  0.5001129 0.4998871
## diff_loglike =  1.484635
## step =  8
## alphak =  0.5104773 0.4895227
## diff_loglike =  1.106963
## step =  9
## alphak =  0.5400065 0.4599935
## diff_loglike =  0.6099247
## step =  10
## alphak =  0.5780281 0.4219719
## diff_loglike =  0.5194065
## step =  11
## alphak =  0.61224 0.38776
## diff_loglike =  0.5047184
## step =  12
## alphak =  0.6424424 0.3575576
## diff_loglike =  0.494532
## step =  13
## alphak =  0.6690101 0.3309899
## diff_loglike =  0.4815958
## step =  14
## alphak =  0.6922987 0.3077013
## diff_loglike =  0.4669713
## step =  15
## alphak =  0.712696 0.287304
## diff_loglike =  0.4533132
## step =  16
## alphak =  0.7306164 0.2693836
## diff_loglike =  0.4437248
## step =  17
## alphak =  0.7464884 0.2535116
## diff_loglike =  0.4413349
## step =  18
## alphak =  0.7607415 0.2392585
## diff_loglike =  0.4492733
## step =  19
## alphak =  0.7737935 0.2262065
## diff_loglike =  0.4709836
## step =  20
## alphak =  0.7860463 0.2139537
## diff_loglike =  0.5107317
## step =  21
## alphak =  0.7978866 0.2021134
## diff_loglike =  0.5741655
## step =  22
## alphak =  0.8096907 0.1903093
## diff_loglike =  0.6688869
## step =  23
## alphak =  0.8218194 0.1781806
## diff_loglike =  0.8050885
## step =  24
## alphak =  0.8345676 0.1654324
## diff_loglike =  0.9845402
## step =  25
## alphak =  0.8479253 0.1520747
## diff_loglike =  1.053584
## step =  26
## alphak =  0.8604795 0.1395205
## diff_loglike =  0.604048
## step =  27
## alphak =  0.8666458 0.1333542
## diff_loglike =  0.01206063
## step =  28
## alphak =  0.8668273 0.1331727
## diff_loglike =  -9.772145e-07``````
``purrr::modify_depth(model_list, 1 ,"mu")``
``````## [[1]]
## [1] 32.98489
##
## [[2]]
## [1] -57.51108``````
``purrr::modify_depth(model_list, 1 ,"sig2")``
``````## [[1]]
## [1] 429.4583
##
## [[2]]
## [1] 90.24988``````
``````# 增加到400个样本点
# old_loglike <- -Inf
# diff_loglike <- Inf
# e <- 0.0001
# set.seed(7)
# y1 <- rnorm(200, mean = 23, sd = 3)
# y2 <- rnorm(200, mean = 7,  sd = 10)
# y <- sample(c(y1, y2))
# mu <- c(23, 7)
# sig2 <- c(9, 100)
# alphak <- c(0.25, 0.75)
# step <- 1
#
# step =  103
# alphak =  0.5331849 0.4668151
# diff_loglike =  9.319626e-05
#
# > purrr::modify_depth(model_list, 1 ,"mu")
# [[1]]
# [1] 23.33774
#
# [[2]]
# [1] 5.449705
#
# > purrr::modify_depth(model_list, 1 ,"sig2")
# [[1]]
# [1] 10.16659
#
# [[2]]
# [1] 96.77137``````