# Chapter 11 条件随机场

``````featfun <- function(yi_1=NA, yi, i) {
all_fea <- list(c(1, 2, 2),
c(1, 2, 3),
c(1, 1, 2),
c(2, 1, 3),
c(2, 1, 2),
c(2, 2, 3),
c(   1, 1),
c(   2, 1),
c(   2, 2),
c(   1, 2),
c(   1, 3),
c(   2, 3))
# weights 是与all_fea互相对应的权值
weights <- c(1,1,0.5,1,1,0.2,1,0.5,0.5,0.8,0.8,0.5)

idx1 <- 0; idx2 <- 0
if (list(c(yi_1, yi, i)) %in% all_fea) {
idx1 <- which(all_fea %in% list(c(yi_1, yi, i)))
}
if (list(c(yi, i)) %in% all_fea) {
idx2 <- which(all_fea %in% list(c(yi, i)))
}

if (idx1 != 0 && idx2 != 0) {
return(list(c(1, weights[idx1]), c(1, weights[idx2])))
} else if (idx1 != 0 && idx2 == 0) {
return(list(c(1, weights[idx1])))
} else if (idx1 == 0 && idx2 != 0) {
return(list(c(1, weights[idx2])))
} else {
return(NA)
}
}``````
``````all_fea <- list(c(1, 2, 2),
c(1, 2, 3),
c(1, 1, 2),
c(2, 1, 3),
c(2, 1, 2),
c(2, 2, 3),
c(   1, 1),
c(   2, 1),
c(   2, 2),
c(   1, 2),
c(   1, 3),
c(   2, 3))
list(c(2,1,3)) %in% all_fea``````
``## [1] TRUE``
``all_fea %in% list(c(2,1,3))``
``````##  [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE``````
``list(c(2L,1L,3L)) %in% all_fea ##注意！！！！``
``## [1] TRUE``
``all_fea %in% list(c(2L,1L,3L))``
``````##  [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE``````
``````#测试featfun()
featfun(2,1,3); featfun(1,1,2)``````
``````## [[1]]
## [1] 1 1
##
## [[2]]
## [1] 1.0 0.8``````
``````## [[1]]
## [1] 1.0 0.5
##
## [[2]]
## [1] 1.0 0.8``````
``featfun(yi = 2, i = 2); featfun(yi = 1, i = 3)``
``````## [[1]]
## [1] 1.0 0.5``````
``````## [[1]]
## [1] 1.0 0.8``````
``featfun(2,2,4); featfun(NA,NA,2)``
``## [1] NA``
``## [1] NA``
``featfun(NA,NA,3); featfun(1,NA,2)``
``## [1] NA``
``## [1] NA``
``is.na(featfun(2,1,3)[1])``
``## [1] FALSE``
``is.na(featfun(2,1,3)[[1]])``
``## [1] FALSE FALSE``

``````prob_obsy <- function(vecy) {
len_y <- length(vecy)
vecvalue <- matrix(NA, len_y - 1, 2)
vecvalue[, 1] <- vecy[seq(1, len_y - 1)]
vecvalue[, 2] <- vecy[seq(2, len_y)]

sumvec <- vector("numeric", length = len_y - 1)
for (i in seq(len_y - 1)) {
result <- featfun(vecvalue[i, 1], vecvalue[i, 2], i + 1)
if (all(is.na(result[[1]]))) {
sum <- NA
} else {
sum <- 0
for (j in seq(length(result))) {
sum_ <- result[[j]][2] * result[[j]][1]
sum <- sum + sum_
}
}
sumvec[i] <- sum
}

if (all(is.na(sumvec))) {
sum1 <- NA
} else {
sum1 <- sum(sumvec, na.rm = TRUE)
}

#再补一个s特征函数, 针对第一个y元素的
sfeat_first <- featfun(yi = vecy[1], i = 1)
if (is.na(sfeat_first)) {
sum2 <- NA
} else {
sum2 <- sfeat_first[[1]][2]*sfeat_first[[1]][1]
}

if (!is.na(sum1) & !is.na(sum2)) {
expsum <- paste0("exp(", sum1 + sum2, ")")
return(c(sum1,sum2,expsum))
} else if (!is.na(sum1) & is.na(sum2)) {
expsum <- paste0("exp(", sum1, ")")
return(c(sum1,sum2,expsum))
} else if (is.na(sum1) & !is.na(sum2)) {
expsum <- paste0("exp(", sum2, ")")
return(c(sum1,sum2,expsum))
} else {
return("exp(NA)")
}
}

# 例11.1
prob_obsy(c(1,2,2))``````
``## [1] "2.2"      "1"        "exp(3.2)"``
``````# 测试其他取值
prob_obsy(c(1,1,1)) # 0.6+1+0.8+0.8``````
``## [1] "2.1"      "1"        "exp(3.1)"``
``prob_obsy(c(2,1,2)) # 1+1+0.5+0.8+0.5``
``## [1] "3.3"      "0.5"      "exp(3.8)"``
``prob_obsy(c(NA,2,1))``
``## [1] "2.3"      NA         "exp(2.3)"``
``prob_obsy(c(1,NA,2))``
``## [1] "0.5"      "1"        "exp(1.5)"``
``prob_obsy(c(1,2,NA))``
``## [1] "1.5"      "1"        "exp(2.5)"``
``prob_obsy(c(1,NA,NA))``
``## [1] NA       "1"      "exp(1)"``
``prob_obsy(c(NA,NA,NA))``
``## [1] "exp(NA)"``

``````WF <- function(fv) {
lenfv <- length(fv)
vecfv <- unlist(fv)
sum <- 0
for (kk in seq(from = 1, by = 2, length = lenfv)) {
sum_ <- vecfv[kk]*vecfv[kk + 1]
sum <- sum + sum_
}
sum
}

#测试WF()
WF(featfun(1,1,2))``````
``## [1] 1.3``
``WF(featfun(1,2,3))``
``## [1] 1.5``
``WF(NA)``
``## [1] NA``

delta_matxFun()返回值是一个长度为2的列表，分别是delta_matx(存放不同位置取值不同标记时特征函数与对应权重的乘积)，delta_max_idx(存放取了max()中的哪个元素，路径回溯之用)

``````delta_matxFun <- function(num_location, level_label) {

delta_matx <- matrix(NA, num_location, level_label)
delta_max_idx <- matrix(NA, num_location, level_label)

#维特比算法
#初始化
for (j in 1:level_label) {
delta_matx[1, j] <- WF(featfun(yi = j, i = 1))
delta_max_idx[1, j] <- 0
}
#递推
for (k in as.numeric(2:num_location)) {  #注意, as.numeric()
for (i in as.numeric(1:level_label)) {  #注意, as.numeric()
#枚举delta_matx[k-1, ]及featfun()
delta_matx[k, i] <- max(
delta_matx[k - 1, 1] + WF(featfun(1, i, k)),
delta_matx[k - 1, 2] + WF(featfun(2, i, k))
)
delta_max_idx[k, i] <- which.max(
c(delta_matx[k - 1, 1] + WF(featfun(1, i, k)),
delta_matx[k - 1, 2] + WF(featfun(2, i, k)))
)
}
}
list(delta_matx = delta_matx,
delta_max_idx = delta_max_idx)
}

delta_matxFun(3, 2)``````
``````## \$delta_matx
##      [,1] [,2]
## [1,]  1.0  0.5
## [2,]  2.3  2.5
## [3,]  4.3  3.8
##
## \$delta_max_idx
##      [,1] [,2]
## [1,]    0    0
## [2,]    1    1
## [3,]    2    1``````
``````# #返回最优路径
# best_path
bestPath <- function(num_location, level_label) {
delta_matx_list <- delta_matxFun(num_location, level_label)
best_path <- vector("numeric", length = num_location)
best_path[num_location] <- which.max(delta_matx_list\$delta_matx[num_location, ])

for (t in (num_location - 1):1) {
best_path[t] <- delta_matx_list\$delta_max_idx[t + 1, best_path[t + 1]]
}
best_path
}

# 例11.3
bestPath(3, 2)``````
``## [1] 1 2 1``
``````######## 11.4 page 210
M1 <- matrix(c(0,0.5,0,0.5),2,2); M1``````
``````##      [,1] [,2]
## [1,]  0.0  0.0
## [2,]  0.5  0.5``````
``M2 <- matrix(c(0.3,0.7,0.7,0.3),2,2); M2``
``````##      [,1] [,2]
## [1,]  0.3  0.7
## [2,]  0.7  0.3``````
``M3 <- matrix(c(0.5,0.6,0.5,0.4),2,2); M3``
``````##      [,1] [,2]
## [1,]  0.5  0.5
## [2,]  0.6  0.4``````
``M4 <- matrix(c(0,0,1,1),2,2); M4``
``````##      [,1] [,2]
## [1,]    0    1
## [2,]    0    1``````
``````# 在每个位置(1,2,3,4), 均有两个标签(值)
rownames(M1) <-
rownames(M2) <-
rownames(M3) <-
rownames(M4) <- c("one", "two")

colnames(M1) <-
colnames(M2) <-
colnames(M3) <-
colnames(M4) <- c("one", "two")

# 如果start = 2, stop = 2
# 例如, 对于路径path = c(2,1,1,1,2), 有
path <- c(2,1,1,1,2)
nonstd_prob <-
M1[path[1], path[2]] *
M2[path[2], path[3]] *
M3[path[3], path[4]] *
M4[path[4], path[5]]
nonstd_prob  ``````
``## [1] 0.075``
``````# 或者这样算
newpath <- sjmisc::rec(path, rec = "1 = one; 2 = two")
nonstd_prob1 <-
M1[newpath[1], newpath[2]] *
M2[newpath[2], newpath[3]] *
M3[newpath[3], newpath[4]] *
M4[newpath[4], newpath[5]]
nonstd_prob1  ``````
``## [1] 0.075``
``````# 所以, 尝试写一个函数来计算特定路径(作为输入)的非规范化概率
nonstd_prob <- function(Matx1=M1, Matx2=M2,
Matx3=M3, Matx4=M4, path) {
M1 <- Matx1
M2 <- Matx2
M3 <- Matx3
M4 <- Matx4
nonstd_prob <-
M1[path[1], path[2]] *
M2[path[2], path[3]] *
M3[path[3], path[4]] *
M4[path[4], path[5]]

return(nonstd_prob)
}

# 枚举多条路径,
# 这里是除了起点和终点的其他3个节点的标签(值)
path_df <- expand.grid(1:2, 1:2, 1:2)
path_df``````
``````##   Var1 Var2 Var3
## 1    1    1    1
## 2    2    1    1
## 3    1    2    1
## 4    2    2    1
## 5    1    1    2
## 6    2    1    2
## 7    1    2    2
## 8    2    2    2``````
``````# 以start = 2为起点, stop = 2为终点
expand <- function(begin=2, vec, end=2) {
return(unlist(c(begin,vec,end)))
}

prob <- vector("numeric", length = nrow(path_df))
for (i in seq(nrow(path_df))) {
path_ <- expand(vec = path_df[i, ])
prob[i] <- nonstd_prob(path = path_)
}

prob; max(prob)``````
``## [1] 0.075 0.175 0.210 0.090 0.075 0.175 0.140 0.060``
``## [1] 0.21``
``````idx_max <- which.max(prob)
expand(vec = path_df[idx_max, ])``````
``````##      Var1 Var2 Var3
##    2    1    2    1    2``````
``````## 规范化因子
M <- M1 %*% M2 %*% M3 %*% M4
# 其中M[2, 2] 是所有(start = 2为起点, stop = 2为终点) 路径的规范化因子
M[2, 2] == sum(prob)``````
``## [1] TRUE``
``````//  chap11.cpp
#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::plugins("cpp11")]]

// [[Rcpp::export]]
List featfun_cpp(int yi_1, int yi, int k) {

IntegerVector v1 = {1,2,2}, v2 = {1,2,3}, v3 = {1,1,2},
v4 = {2,1,3}, v5 = {2,1,2}, v6 = {2,2,3},
v7 = {  1,1}, v8 = {  2,1}, v9 = {  2,2},
v10 = {  1,2}, v11 = {  1,3}, v12 = {  2,3};

List all_fea = List::create(
_["v1"] = v1, _["v2"] = v2, _["v3"] = v3,
_["v4"] = v4, _["v5"] = v5, _["v6"] = v6,
_["v7"] = v7, _["v8"] = v8, _["v9"] = v9,
_["v10"] = v10, _["v11"] = v11, _["v12"] = v12
);

NumericVector weights = {1,1,0.5,1,1,0.2,1,0.5,0.5,0.8,0.8,0.5};

int idx1 = -1, idx2 = -1;

IntegerVector a1(3), a2(2);
a1 = {yi_1, yi, k};
a2 = {yi, k};

IntegerVector fea_i(3);
LogicalVector TF(3);
int len_fea_i = NA_INTEGER, len_a1 = NA_INTEGER;
bool len_equal = NA_LOGICAL, b1 = NA_LOGICAL, bothT = NA_LOGICAL;

for (int i = 0; i < all_fea.size(); ++i) {
fea_i = all_fea[i];
TF = fea_i == a1; //{2,2}=={2,2,4}?
len_fea_i = fea_i.size();
len_a1 = a1.size();
len_equal = len_fea_i == len_a1;
b1 = is_true(all(TF));
bothT = len_equal && b1;
if (bothT) {
idx1 = i;
//Rcpp::Rcout<<"idx1 = "<<idx1<<"\n";
break;
}
}

IntegerVector fea_j(3);
LogicalVector TF2(3);
int len_fea_j = NA_INTEGER, len_a2 = NA_INTEGER;
bool len_equal_ = NA_LOGICAL, b2 = NA_LOGICAL, bothT_ = NA_LOGICAL;

for (int j = 0; j < all_fea.size(); ++j) {
fea_j = all_fea[j];
TF2 = fea_j == a2;
len_fea_j = fea_j.size();
len_a2 = a2.size();
len_equal_ = len_fea_j == len_a2;
b2 = is_true(all(TF2));
bothT_ = len_equal_ && b2;
if (bothT_) {
idx2 = j;
//Rcpp::Rcout<<"idx2 = "<<idx2<<"\n";
break;
}
}

List retLi(2);
double d1 = NA_REAL, d2 = NA_REAL;
NumericVector c1(2), c2(2), c_na(2);

if (idx1 != -1 && idx2 != -1) {
d1 = weights[idx1], d2 = weights[idx2];
c1 = {1, d1};
c2 = {1, d2};
retLi = List::create(_["c1"] = c1, _["c2"] = c2);
} else if (idx1 != -1 && idx2 == -1) {
d1 = weights[idx1];
c1 = {1, d1};
retLi = List::create(_["c1"] = c1);
} else if (idx1 == -1 && idx2 != -1) {
d2 = weights[idx2];
c2 = {1, d2};
retLi = List::create(_["c2"] = c2);
} else {
c_na = {NA_REAL, NA_REAL};
retLi = List::create(_["c_na"] = c_na);
}
return retLi;
}

/*** R
featfun_cpp(1,2,3)
featfun_cpp(2,1,3)
featfun_cpp(1,1,2)
featfun_cpp(NA_integer_, 2, 2)
featfun_cpp(NA_integer_, 1, 3)
featfun_cpp(2,2,4)
featfun_cpp(NA_integer_, NA_integer_, 2)
featfun_cpp(NA_integer_, NA_integer_, 3)
featfun_cpp(1, NA_integer_, 2)
*/

// [[Rcpp::export]]
void prob_obsy_cpp(IntegerVector vecy) {

int len_y = vecy.size();
IntegerMatrix vecvalue(len_y - 1, 2);
IntegerVector idx1 = seq(1, len_y - 1), idx2 = seq(2, len_y);
IntegerMatrix::Column col0 = vecvalue(_, 0), col1 = vecvalue(_, 1);
IntegerVector vecy1 = vecy[idx1 - 1], vecy2 = vecy[idx2 - 1];  // index error
vecvalue(_, 0) = vecy1;
vecvalue(_, 1) = vecy2;

int para1 = NA_INTEGER, para2 = NA_INTEGER;
List result(2);
NumericVector result_0(2);
LogicalVector b1(2);
bool b11 = NA_LOGICAL;
double summ = NA_REAL, sum_ = NA_REAL;
NumericVector sumvec(len_y - 1);
NumericVector result_elem(2);

for (int i = 0; i < len_y - 1; ++i) {
para1 = vecvalue(i, 0);
para2 = vecvalue(i, 1);
result = as<List>(featfun_cpp(para1,para2,i+2));

result_0 = result[0];
b1 = is_na(result_0);
b11 = is_true(all(b1));

if (b11) {
summ = NA_REAL;
} else {
summ = 0.0;
for (int j = 0; j < result.length(); ++j) {
result_elem = result[j];
sum_ = result_elem[1] * result_elem[0];
summ = summ + sum_;
}
}
sumvec[i] = summ;
}

LogicalVector b2 = is_na(sumvec);
bool b21 = is_true(all(b2));
NumericVector sum1(1);
if (b21) {
sum1[0] = NA_REAL;
} else {
NumericVector sumvec_noNA = sumvec[!is_na(sumvec)];
sum1[0] = sum(sumvec_noNA);
}

List sfeat_first(1);
int yf = vecy[0];
sfeat_first = as<List>(featfun_cpp(NA_INTEGER, yf, 1));
NumericVector sfeat_first_0 = sfeat_first[0];
LogicalVector b3 = is_na(sfeat_first_0);
bool b31 = is_true(all(b3));
NumericVector sum2(1);
if (b31) {
sum2[0] = NA_REAL;
} else {
sum2[0] = sfeat_first_0[1] * sfeat_first_0[0];
}

LogicalVector b4 = is_na(sum1);
LogicalVector b5 = is_na(sum2);
bool b41 = is_true(all(b4));
bool b51 = is_true(all(b5));

double sum12 = NA_REAL;
if (b41 == false && b51 == false) {
sum12 = sum1[0] + sum2[0];
Rcpp::Rcout<<sum1<<", "<<sum2<<", "
<<"exp("<<sum12<<")"<<"\n";
} else if (b41 == false && b51 == true) {
Rcpp::Rcout<<sum1<<", "<<sum2<<", "
<<"exp("<<sum1<<")"<<"\n";
} else if (b41 == true && b51 == false) {
Rcpp::Rcout<<sum1<<", "<<sum2<<", "
<<"exp("<<sum2<<")"<<"\n";
} else {
Rcpp::Rcout<<"exp(NA)"<<"\n";
}
}

/***R
prob_obsy_cpp(c(1,2,2))
prob_obsy_cpp(c(1,1,1)) # 0.6+1+0.8+0.8
prob_obsy_cpp(c(2,1,2)) # 1+1+0.5+0.8+0.5
prob_obsy_cpp(c(NA,2,1))
prob_obsy_cpp(c(1,NA,2))
prob_obsy_cpp(c(1,2,NA))
prob_obsy_cpp(c(1,NA,NA))
prob_obsy_cpp(c(NA,NA,NA))
prob_obsy_cpp(sample(1:3,100,replace = TRUE))

*/

// [[Rcpp::export]]
double WF_cpp(int para1, int para2, int k) {

List fv_res = as<List>(featfun_cpp(para1, para2, k));

int lenfv = fv_res.size();
NumericVector vecfv(1);

// unlist() in Rcpp
for (int i = 0; i < lenfv; ++i) {
NumericVector fv_res_ele = fv_res[i];
for (int j = 0; j < fv_res_ele.size(); ++j) {
vecfv.push_back(fv_res_ele[j]);
}
}
vecfv.erase(0);

int vecfv_len = vecfv.size();
IntegerVector intseq = seq(1, vecfv_len);
IntegerVector vecfv_idx_half(vecfv_len / 2);

for (int L = 0; L < vecfv_len / 2; ++L) {
vecfv_idx_half[L] = intseq[L*2];
}
vecfv_idx_half = vecfv_idx_half - 1; // c(1,3) - 1 == c(0,2)

int idx = 0;
double Sum = 0.0;
double Sum_ = 0.0;
for (int kk = 0; kk < vecfv_idx_half.size(); ++kk) {
idx = vecfv_idx_half[kk];
Sum_ = vecfv[idx] * vecfv[idx + 1];
Sum = Sum + Sum_;
}
return Sum;
}

/***R
WF_cpp(1,1,2)
WF_cpp(1,2,3)
WF_cpp(1,NA_integer_, NA_integer_)
*/

// [[Rcpp::export]]
List delta_matxFun_cpp(int num_location, int level_label) {
NumericMatrix delta_matx(num_location, level_label);
IntegerMatrix delta_max_idx(num_location, level_label);

double WF_cpp_res = NA_REAL;
for (int j = 0; j < level_label; ++j) {
WF_cpp_res = WF_cpp(NA_INTEGER, j+1, 1);
delta_matx(0, j) = WF_cpp_res;
delta_max_idx(0, j) = 0;
}

double b1 = NA_REAL, b2 = NA_REAL, b12 = NA_REAL,
b3 = NA_REAL, b4 = NA_REAL, b34 = NA_REAL,
max_b1234 = NA_REAL;

int idx = NA_INTEGER;
NumericVector b1234(2);

for (int k = 1; k < num_location; ++k) {
for (int i = 0; i < level_label; ++i) {
b1 = delta_matx(k - 1, 0);
b2 = WF_cpp(1, i+1, k+1);
b12 = b1 + b2;

b3 = delta_matx(k - 1, 1);
b4 = WF_cpp(2, i+1, k+1);
b34 = b3 + b4;

b1234 = {b12, b34};

max_b1234 = max(b1234);
delta_matx(k, i) = max_b1234;

idx = which_max(b1234);
delta_max_idx(k, i) = idx;
}
}

return List::create(_["delta_matx"] = delta_matx,
_["delta_max_idx"] = delta_max_idx);
}

/***R
delta_matxFun_cpp(3,2)
*/

// [[Rcpp::export]]
IntegerVector bestPath_cpp(int num_location, int level_label) {

List res = as<List>(delta_matxFun_cpp(num_location,
level_label));
IntegerVector best_path(num_location);

NumericMatrix res_dmx = res["delta_matx"];
NumericVector res_dmx_sub = res_dmx(num_location-1,_);
int idx = which_max(res_dmx_sub);
best_path[num_location-1] = idx; //

IntegerMatrix res_dmi = res["delta_max_idx"];
int bp = NA_INTEGER;
for (int t = 1; t >= 0; --t) {
bp = best_path[t+1];
best_path[t] = res_dmi(t+1, bp); //
}
best_path = best_path + 1; // idx in R and idx in C++
return best_path;
}

/***R
bestPath_cpp(3,2)
*/``````