Chapter 6 逻辑斯谛回归与最大熵模型

require(magrittr)

iris1 <- iris[1:100, 1:4]
iris1[, "Classes"] <- c(rep(1,50), rep(0,50))
head(iris1)

##逻辑斯谛回归模型的梯度下降算法
#全部向量用列向量表示
nc <- ncol(iris1)
nr <- nrow(iris1)
lambda <- 0.0005
eps <- 0.0003
set.seed(2017)
wi <- matrix(rnorm(nc - 1, 0, 0.5), nc - 1, 1)
gradient <- matrix(NA, nc - 1, 1)
k <- 1  #记录迭代步数

while (TRUE) {
  print(paste0("迭代解:", wi))
  print(paste0("迭代步数:", k))
  for (i in seq(nc - 1)) {
    grad <- 0
    for (j in seq(nr)) {
      exp_ <- exp(as.matrix(iris1[j, 1:(nc - 1)]) %*% wi)
      grad_  <- 1 / (exp_ + 1) * 
        exp_ * iris1[j, i] - iris1[j, nc] * iris1[j, i]
      grad <- grad + grad_
    }
    gradient[i,1] <- grad
  }
  
  #梯度的模
  #mod_grad <- norm(gradient,type = c("2")) 
  #print(mod_grad)
  print(paste0("梯度:", gradient))
  p <- -gradient
  
  #更新w
  w <- wi + lambda * p
  m <- norm(w - wi, type = c("2"))
  print(paste0("m:", m))
  if (m < eps) {
    w_wanted <- w
    print(paste0("w_wanted:", w_wanted))
    break
  }
  wi <- w
  k <- k + 1
}

proba <- c()
for (i in seq(nr)) {
  nominator <- exp(as.matrix(iris1[i, 1:(nc - 1)]) %*% w_wanted)
  denominator <- 1 + exp(as.matrix(iris1[i, 1:(nc - 1)]) %*%
                           w_wanted)
  p <- nominator / denominator
  proba <- append(proba, p)
}

which(proba > 0.5)
rm(list = ls())
sapply(c("tm",             #version: 0.7-1  重要!!!!!!!!!
         "SnowballC",      #version: 0.5.1
         "stringr"),       #version: 1.2.0
       require, 
       character.only = TRUE)

options(stringsAsFactors = FALSE)
datapath <- "~/Reuters25178/training"
category <- c("gnp", "gold", "moneysupply")

#训练数据集
gnp.train.dir <- paste0(getwd(),"/Reuters21578/training/gnp")
gold.train.dir <- paste0(getwd(),"/Reuters21578/training/gold")
money.train.dir <- paste0(getwd(),"/Reuters21578/training/money-supply")

# 测试数据集
gnptest.dir <- paste0(getwd(),"/Reuters21578/test/gnp")
goldtest.dir <- paste0(getwd(),"/Reuters21578/test/gold")
moneysupplytest.dir <- paste0(getwd(),"/Reuters21578/test/money-supply")

cleanupDataCorpus <- function(dataCorpus) {
  cleanDataCorpus <- tm_map(dataCorpus, removePunctuation)
  cleanDataCorpus <- tm_map(cleanDataCorpus, removeNumbers)
  cleanDataCorpus <- tm_map(cleanDataCorpus, 
                            content_transformer(str_replace_all),
                            pattern = "[[:punct:]]", replacement = " ")
  cleanDataCorpus <- tm_map(cleanDataCorpus, content_transformer(tolower))
  cleanDataCorpus <- tm_map(cleanDataCorpus, removeWords, 
                            c("said", "u.s", stopwords("english")))
  cleanDataCorpus <- tm_map(cleanDataCorpus, stemDocument)
  #tm包version为0.7-1,该句可注释掉
  #cleanDataCorpus <- tm_map(cleanDataCorpus, PlainTextDocument)
  return(cleanDataCorpus)
}

gnp.corpus <- Corpus(DirSource(directory = gnp.train.dir, 
                               encoding = "ASCII"))
gol.corpus <- Corpus(DirSource(directory = gold.train.dir, 
                               encoding = "ASCII"))
mon.corpus <- Corpus(DirSource(directory = money.train.dir, 
                               encoding = "ASCII"))

gnp.cleancorpus <- cleanupDataCorpus(gnp.corpus)
gol.cleancorpus <- cleanupDataCorpus(gol.corpus)
mon.cleancorpus <- cleanupDataCorpus(mon.corpus)

gnp.tdm <- TermDocumentMatrix(gnp.cleancorpus)
gol.tdm <- TermDocumentMatrix(gol.cleancorpus)
mon.tdm <- TermDocumentMatrix(mon.cleancorpus)

inspect(gnp.tdm[1:10,1:10])

gnp.tdm <- removeSparseTerms(gnp.tdm, 0.7)
gol.tdm <- removeSparseTerms(gol.tdm, 0.7)
mon.tdm <- removeSparseTerms(mon.tdm, 0.7)

gnp.matx <- t(data.matrix(gnp.tdm))
gol.matx <- t(data.matrix(gol.tdm))
mon.matx <- t(data.matrix(mon.tdm))

head(gnp.matx)
head(gol.matx)
head(mon.matx)

gnp.df <- as.data.frame(gnp.matx)
gol.df <- as.data.frame(gol.matx)
mon.df <- as.data.frame(mon.matx)

gnptest.corpus <- Corpus(DirSource(directory = gnptest.dir, 
                                   encoding = "ASCII"))
goltest.corpus <- Corpus(DirSource(directory = goldtest.dir, 
                                   encoding = "ASCII"))
montest.corpus <- Corpus(DirSource(directory = moneysupplytest.dir, 
                                   encoding = "ASCII"))

gnptest.cleancorpus <- cleanupDataCorpus(gnptest.corpus)
goltest.cleancorpus <- cleanupDataCorpus(goltest.corpus)
montest.cleancorpus <- cleanupDataCorpus(montest.corpus)

gnptest.tdm <- TermDocumentMatrix(gnptest.cleancorpus)
goltest.tdm <- TermDocumentMatrix(goltest.cleancorpus)
montest.tdm <- TermDocumentMatrix(montest.cleancorpus)

gnptest.tdm <- removeSparseTerms(gnptest.tdm, 0.7)
goltest.tdm <- removeSparseTerms(goltest.tdm, 0.7)
montest.tdm <- removeSparseTerms(montest.tdm, 0.7)

gnptest.matx <- t(data.matrix(gnptest.tdm))
goltest.matx <- t(data.matrix(goltest.tdm))
montest.matx <- t(data.matrix(montest.tdm))

head(gnptest.matx)
head(goltest.matx)
head(montest.matx)

gnptest.df <- as.data.frame(gnptest.matx)
goltest.df <- as.data.frame(goltest.matx)
montest.df <- as.data.frame(montest.matx)

##转换为矩阵,gnp.df / gnptest.df 不允许重复的行名
gnp.df.mat <- as.matrix(gnp.df)
gnptest.df.mat <- as.matrix(gnptest.df)

gnptestfiltered1 <- data.frame(gnptest.df.mat[, intersect(colnames(gnptest.df.mat), 
                                                            colnames(gnp.df.mat))])
gnptestfiltered1[1, ]

gnptestfiltered2 <- read.table(textConnection(""), 
                               col.names = colnames(gnp.df.mat))

gnptestfiltered <- rbind.fill(gnptestfiltered1, gnptestfiltered2)
gnptestfiltered[1,]

ncol(gnptestfiltered)

##转换为矩阵,gol.df / goltest.df 不允许重复的行名
gol.df.mat <- as.matrix(gol.df)
goltest.df.mat <- as.matrix(goltest.df)

goltestfiltered1 <- data.frame(goltest.df.mat[, intersect(colnames(goltest.df.mat), 
                                                          colnames(gol.df.mat))])

goltestfiltered2 <- read.table(textConnection(""),
                               col.names = colnames(gol.df.mat))

goltestfiltered <- rbind.fill(goltestfiltered1, 
                              goltestfiltered2)

ncol(goltestfiltered)


##转换为矩阵,mon.df / montest.df 不允许重复的行名
mon.df.mat <- as.matrix(mon.df)
montest.df.mat <- as.matrix(montest.df)

montestfiltered1 <- data.frame(montest.df.mat[, intersect(colnames(montest.df.mat), 
                                                          colnames(mon.df.mat))])

montestfiltered2 <- read.table(textConnection(""), 
                               col.names = colnames(mon.df.mat))

montestfiltered <- rbind.fill(montestfiltered1, montestfiltered2)

ncol(montestfiltered)

gnp.df <- cbind(gnp.df, category = rep("gnp"))
gol.df <- cbind(gol.df, category = rep("gold"))
mon.df <- cbind(mon.df, category = rep("money_supply"))

gnptestfiltered <- cbind(gnptestfiltered, category = rep("gnp"))
goltestfiltered <- cbind(goltestfiltered, category = rep("gold"))
montestfiltered <- cbind(montestfiltered, category = rep("money_supply"))

##############训练用
tdmStackCateg <- rbind.fill(gnp.df, gol.df, mon.df)

nrow(gnp.df); nrow(gol.df); nrow(mon.df)

nrow(tdmStackCateg)

tdmStackCateg[is.na(tdmStackCateg)] <- 0

ncol(gnp.df); ncol(gol.df); ncol(mon.df)

ncol(tdmStackCateg)

####算法输入用
which(colnames(tdmStackCateg) == "category") #[1] 48
ncol(tdmStackCateg) #[1] 68
#将"category"列放置最后一列
tdmStackCateg <- tdmStackCateg[, c(1:47, 49:68, 48)] 
tdmStackCateg[,"rowid"] <- rownames(tdmStackCateg)
head(tdmStackCateg)
require(magrittr)
require(tibble)
nr_tdm <- nrow(tdmStackCateg)
len_lab <- tdmStackCateg[, "category"] %>% 
  unique(.) %>% 
  length(.)

#定义特征函数, 0-1特征函数, 词频特征函数
nc_tdm <- ncol(tdmStackCateg)
#最后两个是"category", "rowid" 
voc <- colnames(tdmStackCateg)[-c(nc_tdm - 1, nc_tdm)]

#voc属于哪个lab?
voc_lab <- matrix(NA ,length(voc),2)
for (i in seq(length(voc))) {
  idx_cat <- which(colnames(tdmStackCateg) == "category")
  sub <- tdmStackCateg[, c(i,idx_cat)]
  idx <- which(sub[, 1] != 0)
  sub <- sub[idx, ]
  colnames(sub) <- c("voc","categ")
  dd <- ddply(sub, ~ categ, summarize,sum = sum(voc))
  voc_lab[i, 1] <- voc[i]
  voc_lab[i, 2] <- dd[which.max(dd[,"sum"]), "categ"]
}
voc_lab

#####特征函数1
featfun1 <- function(x = tdmStackCateg[1, 1:length(voc)],
                     y = tdmStackCateg[1, "category"],
                     i = 1) {  
  lab <- voc_lab[which(voc_lab[, 1] == voc[i]), 2]
  if (y == lab) {
    fvalue <- x[, voc[i]]
    if (fvalue != 0) {
      fvalue <- x[, voc[i]]   #如果词频不为0,则特征值取词频
    } else {                  
      fvalue <- 0.1           #如果词频为0,则特征值取0.1,"绝对折扣"
    }
  } else {
    fvalue <- 0
  }
  fvalue
}

featfun1(x = tdmStackCateg[11, 1:length(voc)],
         y = tdmStackCateg[11, "category"],
         i = 40)  #第11个样本点,第40个特征函数
featfun1(x = tdmStackCateg[11, 1:length(voc)],
         y = tdmStackCateg[11, "category"],
         i = 43)  #第11个样本点,第43个特征函数

colnames(tdmStackCateg)
voc

matx_tdmSC <- as.matrix(tdmStackCateg[, 1:length(voc)])

#假设计算f(doc1,lab310)的第一至第length(voc)个特征函数的值
val <- c()
for (k in seq(length(voc))) {
  val_ <- featfun1(x = tdmStackCateg[1, 1:length(voc)],
                   y = tdmStackCateg[310, "category"],
                   i = k)
  val <- append(val, val_)
}
val

#对于所有的样本,计算f#
f_sharp_matx <- matrix(NA, nr_tdm, 1)
for (j in seq(nr_tdm)) {
  feature_sharp <- 0
  for (k in seq(length(voc))) {
    feature_ <- featfun1(x = tdmStackCateg[j, 1:length(voc)],
                         y = tdmStackCateg[j, "category"], 
                         i = k)
    feature_sharp <- feature_sharp + feature_
  }
  f_sharp_matx[j, 1] <- feature_sharp
}

f_sharp_matx
max(f_sharp_matx) #[1] 163.3

###定义函数pxy,求联合分布概率
pxy <- function(x = tdmStackCateg[1, "rowid"],
                y = tdmStackCateg[1, "category"]) {
  v <- tdmStackCateg %>% 
    subset(., rowid == x & category == y) %>% 
    nrow(.)
  p <- v / nr_tdm 
  p
}

pxy(x = tdmStackCateg[1, "rowid"],
    y = tdmStackCateg[1, "category"])

###定义函数px,求边缘分布px
px <- function(x=tdmStackCateg[1, "rowid"]) {
  v <- tdmStackCateg %>% 
    subset(., rowid == x) %>% 
    nrow(.)
  p <- v / nr_tdm
  p
}

px(x = tdmStackCateg[10, "rowid"])

###定义函数pyx,求最大熵模型pyx
wi <- matrix(0, length(voc), 1)
lab <- tdmStackCateg[, "category"]
pyx <- function(xx = tdmStackCateg[1, 1:length(voc)],
                yy = tdmStackCateg[1, "category"], 
                w = wi) {
  fea <- matrix(NA, length(voc), 1)
  for (k in seq(length(voc))) {
    fea[k, 1] <- featfun1(x = xx, y = yy, i = k)
  }
  nominator <- exp(t(w) %*% fea)
  denominator <- 0
  feat <- matrix(NA, length(voc), 1)
  for (j in seq(length(lab))) {
    for (v in seq(length(voc))) {
      feat[v, 1] <- featfun1(x = xx, y = lab[j], i = v)
    }
    denominator_ <- exp(t(w) %*% feat)
    denominator <- denominator + denominator_
  }
  p <- (nominator / denominator) %>% 
    as.numeric()
  p
}

pyx(xx = tdmStackCateg[1, 1:length(voc)],
    yy = tdmStackCateg[1, "category"], 
    w = wi)

#定义函数EPF_nomin
#k=1,则计算针对第1个特征函数的EPF_nomin
EPF_nomin <- function(k) {    
  valu <- 0
  for (j in seq(nr_tdm)) {
    pxy <- pxy(x = tdmStackCateg[j, "rowid"],
               y = tdmStackCateg[j, "category"])
    feat <- featfun1(x = tdmStackCateg[j, 1:length(voc)],
                     y = tdmStackCateg[j, "category"], 
                     i = k)
    valu_ <- pxy * feat
    valu <- valu + valu_ 
  }
  valu
}

EPF_nomin(1)

#定义函数EPF_denomin
#k=1, W=matrix(0, length(voc), 1) 则计算针对第1个特征函数的EPF_denomin
EPF_denomin <- function(k, W) {   
  valu <- 0
  for (j in seq(nr_tdm)) {
    px <- px(x = tdmStackCateg[j, "rowid"])
    pyx <- pyx(xx = tdmStackCateg[j, 1:length(voc)],
               yy = tdmStackCateg[j, "category"], 
               w = W)
    feat <- featfun1(x = tdmStackCateg[j, 1:length(voc)],
                     y = tdmStackCateg[j, "category"], 
                     i = k)
    valu_ <- px * pyx * feat
    valu <- valu + valu_ 
  }
  valu
}

len_voc <- length(voc)
mat_voc <- matrix(0, len_voc, 1)
EPF_denomin(1, mat_voc)
chap6_1

chap6_1

###算法6.1,改进的迭代尺度算法IIS
eps <- 0.001
bigM <- max(f_sharp_matx)
W <- matrix(0, len_voc, 1)
k <- 1  #记录迭代步数
nominator_ <- matrix(NA, len_voc, 1)
for (i in seq(len_voc)) {
  nominator_[i, 1] <- EPF_nomin(i) %>% 
    round(., 6)  ##放while循环之外
}

while (TRUE) {
  print(paste0("迭代解:", W))
  print(paste0("迭代步数:", k))
  theta <- matrix(NA, len_voc, 1)
  for (i in seq(len_voc)) {
    nominator <- nominator_[i, 1]   
    denominator <- EPF_denomin(i, W)
    theta[i, 1] <- (log(nominator / denominator) / bigM) %>% 
      round(., 6)
    print(paste0("i = ", i))
  }
  WI <- W + theta
  if (all(WI - W < eps)) {
    W_wanted <- WI
    print(paste0("W_wanted:", W_wanted))
    break
  }
  W <- WI
  k <- k + 1
}
chap6_2

chap6_2

######算法6.2,最大熵模型学习的BFGS算法
require(MASS)
#ginv() 求逆
###开始执行,顺序执行,从k <- 1 开始
k <- 1
eps <- 0.001
lambda <- 0.0005
len_voc <- length(voc)
W <- matrix(0, len_voc, 1)
#B 是用于逼近海赛矩阵H的,B应该是 W 的函数,且B是正定对称矩阵,
#那么,B <- diag(1, len_voc, len_voc)是否有问题? 详见220页,92页
B <- diag(1, len_voc, len_voc)  
g <- matrix(NA, len_voc, 1)
##计算算法6.2第2步的gk
for (j in seq(len_voc)) {
  g[j, 1] <- EPF_denomin(j, W) - EPF_nomin(j) #计算梯度
}

gk <- norm(g, type = c("2"))
if (gk < eps) {
  W_wanted <- W
  print(paste0("W_wanted: ", W_wanted))
  break
} 

while (TRUE) {
  pk <- -(ginv(B) %*% g)
  W2 <- W + lambda * pk
  ##计算算法6.2第6步的gk2
  g2 <- matrix(NA, len_voc, 1)
  for (j in seq(len_voc)) {
    g2[j, 1] <- EPF_denomin(j, W2) - EPF_nomin(j) #计算梯度
  gk2 <- norm(g2, type = c("2"))
  if (gk2 < eps) {
    W_wanted2 <- W2
    print(paste0("W_wanted2: ", W_wanted2))
    break
  } 
  yk <- g2 - g
  thetak <- W2 - W
  nomi_y <- t(yk) %*% yk        #得到一个数
  denomi_y <- t(yk) %*% thetak  #得到一个数
  nomi_theta <- B %*% thetak %*% t(thetak) %*% B#得到一个矩阵
  denomi_theta <- t(thetak) %*% B %*% thetak    #得到一个数
  
  part1 <- nomi_y / denomi_y
  part2 <- nomi_theta / denomi_theta
  
  if (denomi_y > 0) {   #校正公式
    B <- B + part1 - part2
  } else {
    B <- B
  }
  g <- g2
  k <- k + 1
}
// 算法6.1, 改进的迭代尺度算法IIS, IIS.cpp

// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
using namespace Rcpp;

// [[Rcpp::plugins("cpp11")]]

double featfun1_cpp01(CharacterMatrix voc_lab, 
                      CharacterVector voc,
                      NumericVector x, 
                      String y, int i) {
  
  CharacterVector col0 = voc_lab(_, 0);
  CharacterVector st(1);
  st[0] = voc[i - 1]; // index, - 1
  IntegerVector A = match(st, col0); 
  String lab = voc_lab(A[0] - 1, 1);  // index, - 1
  double fvalue(NA_REAL);
  if (lab == y) {
    fvalue = x[i - 1];
    if (fvalue != 0.0) {
      fvalue = x[i - 1];
    } else {
      fvalue = 0.1;
    }
  } else {
    fvalue = 0.0;
  }
  return fvalue;
}


double pxy_cpp(DataFrame tdmStackCateg, 
               String x, String y) {
  double nr_tdm = tdmStackCateg.nrows();
  CharacterVector rowID = tdmStackCateg["rowid"];
  CharacterVector Category = tdmStackCateg["category"];
  double counT = 0;
  for (int i = 0; i < rowID.length(); i++) {
    bool b1 = x == rowID[i];
    bool b2 = y == Category[i];
    if (b1 && b2) {
      counT = counT + 1;
    } else {
      counT = counT + 0;
    }
  }
  double p = counT / nr_tdm;
  return p;
}

double px_cpp(DataFrame tdmStackCateg, String x) {
  double nr_tdm = tdmStackCateg.nrows();
  CharacterVector rowID = tdmStackCateg["rowid"];
  double counT = 0;
  for (int i = 0; i < rowID.length(); i++) {
    bool b1 = x == rowID[i];
    if (b1) {
      counT = counT + 1;
    } else {
      counT = counT + 0;
    }
  }
  double p = counT / nr_tdm;
  return p;
}


double pyx_cpp01(DataFrame tdmStackCateg,
                 CharacterMatrix voc_lab, 
                 CharacterVector voc, 
                 NumericVector xx, 
                 String yy, 
                 NumericMatrix w) {
  CharacterVector lab = tdmStackCateg["category"];
  int vocLen = voc.length();
  NumericVector fea(vocLen);
  
  int wnr = w.nrow(), wnc = w.ncol();
  arma::mat W(w.begin(), wnr, wnc, false);
  arma::colvec FEA(fea.begin(), vocLen, false);
  
  for (int k = 0; k < voc.length(); k++) {
    FEA[k] = featfun1_cpp01(voc_lab, voc, xx, yy, k + 1);
  }
  arma::colvec nominator_1 = exp(arma::trans(W) * FEA);
  double nominator = nominator_1[0];
  
  double denominator = 0.0;
  String stri = NA_STRING;
  double denominator_1 = NA_REAL;
  
  NumericVector feat(vocLen);
  arma::colvec FEAT(feat.begin(), vocLen, false);
  for (int j = 0; j < lab.length(); j++) {
    stri = lab[j];
    for (int v = 0; v < vocLen; v++) {
      FEAT[v] = featfun1_cpp01(voc_lab, voc, xx, stri, v + 1);
    }
    arma::colvec DenoMinator = exp(arma::trans(W) * FEAT);
    denominator_1 = DenoMinator[0];
    denominator = denominator + denominator_1;
  }
  double p = nominator / denominator;
  return p;
}

double EPF_nomin_cpp(DataFrame tdmStackCateg, 
                     NumericMatrix matx_tdmSC, 
                     CharacterMatrix voc_lab, 
                     CharacterVector voc, 
                     int k) {
  
  double valu = 0.0;
  double valu_1 = NA_REAL;
  
  int nr_tdm = tdmStackCateg.nrows();
  CharacterVector rowID = tdmStackCateg["rowid"];
  CharacterVector lab = tdmStackCateg["category"];
  
  double pxy = NA_REAL;
  double feat = NA_REAL;
  
  String stri_rid = NA_STRING;
  String stri_lab = NA_STRING;
  
  for (int j = 0; j < nr_tdm; j++) {
    stri_rid = rowID[j];
    stri_lab = lab[j];
    pxy = pxy_cpp(tdmStackCateg, stri_rid, stri_lab);
    NumericVector oneVector = matx_tdmSC(j, _);
    feat = featfun1_cpp01(voc_lab, voc, oneVector, stri_lab, k);
    valu_1 = pxy * feat;
    valu = valu + valu_1;
  }
  return valu;
}

// [[Rcpp::export]]
double EPF_denomin_cpp01(DataFrame tdmStackCateg, 
                         NumericMatrix matx_tdmSC, 
                         CharacterMatrix voc_lab, 
                         CharacterVector voc,
                         int k, 
                         NumericMatrix W) {
  double valu = 0.0;
  double valu_1 = NA_REAL;
  
  int nr_tdm = tdmStackCateg.nrows();
  CharacterVector rowID = tdmStackCateg["rowid"];
  CharacterVector lab = tdmStackCateg["category"];
  
  double px = NA_REAL;
  double pyx = NA_REAL;
  double feat = NA_REAL;
  String stri_rid = NA_STRING;
  String stri_lab = NA_STRING;
  
  for (int j = 0; j < nr_tdm; j++) {
    stri_rid = rowID[j];
    stri_lab = lab[j];
    NumericVector oneVector = matx_tdmSC(j, _);
    px = px_cpp(tdmStackCateg, stri_rid);
    pyx = pyx_cpp01(tdmStackCateg, voc_lab, voc, oneVector, stri_lab, W);
    feat = featfun1_cpp01(voc_lab, voc, oneVector, stri_lab, k);
    valu_1 = px * pyx * feat;
    valu = valu + valu_1;
  }
  return valu;
}

// [[Rcpp::export]]
NumericMatrix final(double eps, 
                    double bigM, 
                    DataFrame tdmStackCateg, 
                    NumericMatrix matx_tdmSC, 
                    CharacterMatrix voc_lab, 
                    CharacterVector voc,
                    NumericMatrix W) {
  int k = 1;
  int len_voc = voc.length();
  NumericMatrix nominator_(len_voc, 1);
  
  for (int i = 0; i < len_voc; i++) {
    nominator_(i, 0) = EPF_nomin_cpp(tdmStackCateg, 
                                     matx_tdmSC, 
                                     voc_lab, 
                                     voc,
                                     i + 1);
  }
  
  NumericMatrix theta(len_voc, 1);
  double nominator = NA_REAL;
  double denominator = NA_REAL;
  NumericMatrix WI = clone(W);
  NumericMatrix W_wanted = clone(W);
  LogicalMatrix THETA(len_voc, 1);
  bool THETA_single = NA_LOGICAL;
  
  while (true) {
    Rcpp::Rcout << "the value of theta: " << theta;
    Rcpp::Rcout << "k = " << k;
    for (int j = 0; j < len_voc; j++) {
      nominator = nominator_(j, 0);
      denominator = EPF_denomin_cpp01(tdmStackCateg, 
                                      matx_tdmSC, 
                                      voc_lab, 
                                      voc,
                                      j + 1, 
                                      W);
      theta(j, 0) = log(nominator / denominator) / bigM; 
    }
    for (int k = 0; k < W.nrow(); k++) {
      for (int h = 0; h < W.ncol(); h++) {
        WI(k, h) = W(k, h) + theta(k, h);
      }
    }
    
    for (int m = 0; m < len_voc; m++) {
      THETA_single = theta(m, 0) < eps;
      THETA(m, 0) = THETA_single;
    }
    
    if (is_true(all(THETA))) {
      W_wanted = WI;
      break;
    }

    W = WI;
    k = k + 1;
  }
  return W_wanted;
}
rbenchmark::benchmark(EPF_denomin_cpp01(tdmStackCateg = tdmStackCateg, 
                                        matx_tdmSC = matx_tdmSC,
                                        voc_lab = voc_lab,
                                        voc = voc,
                                        k = 1, 
                                        W = mat_voc),
                      EPF_denomin(1, mat_voc), 
                      replications = 1)

#                                                                                                                         test
# 2                                                                                                     EPF_denomin(1, mat_voc)
# 1 EPF_denomin_cpp01(tdmStackCateg = tdmStackCateg, matx_tdmSC = matx_tdmSC, voc_lab = voc_lab, voc = voc, k = 1, W = mat_voc)
#   replications elapsed relative user.self sys.self user.child sys.child
# 2            1  113.91    7.242    112.29     0.02         NA        NA
# 1            1   15.73    1.000     15.65     0.01         NA        NA

eps <- 0.001
bigM <- max(f_sharp_matx)
final(eps, 
      bigM, 
      tdmStackCateg, 
      matx_tdmSC, 
      voc_lab, 
      voc,
      wi)