# 11 Basic: control flow

Control flow 可以分为 conditional flow （条件流）和 loop （循环）两大类。

（本节内所有的流程图都来自GeeksforGeeks

## 11.1 Conditional flow

### 11.1.1if else

if (cond) {
cons.expr
} else {
alt.expr
}

age <- 12
# age <- c(11, 12)
if (age < 12) {
"elementary school"
} else {
"middle school"
}
#> [1] "middle school"

if else 有以下 4 种情况：

#### 11.1.1.1 Single cond without else

if (cond) {
expr
}

exprcond都很简短时，可以进一步简化为：

if (cond) cond expr
if (cond) cons.expr else alt.expr

input <- 1
if (input == 1) "yes"
#> [1] "yes"
if (input == 2) "yes" else "no"
#> [1] "no"

#### 11.1.1.2 Multiple conds with else if

if (cond1) {
cond1.expr
} else if (cond2) {
cond2.expr
} else if (cond3) {
cond3.expr
}

input <- 1
if (input == 1) {
"a"
} else if (input == 2) {
"b"
} else if (input == 3) {
"c"
}
#> [1] "a"

input <- 4
if (input == 1) {
"a"
} else if (input == 2) {
"b"
} else if (input == 3) {
"c"
} else {
stop(paste("Exception encountered,", input, "does not satisfy any condition!"))
}
#> Error in eval(expr, envir, enclos): Exception encountered, 4 does not satisfy any condition!

#### 11.1.1.3 Complex cond

if结构中的cond可以是比较复杂的关系 + 逻辑运算的结果，具体又分为两种情况：

1. 结果是长度为 1 的 logical vector
a <- 0
if (a > -3 & a < -1 || a > 1 & a < 3) {
"yes"
}
1. 结果是长度大于 1 的 logical vector

cond的结果是长度大于 1 的 logical vector，在 4.2.0 以下版本的 R 中会得到一个 warning，例如

rnd <- runif(3)
rnd
if (rnd > 0.95) {
"you are awfully lucky"
}

[1] 0.5021010 0.8256499 0.1971661
Warning message:
In if (rnd > 0.95) { :
the condition has length > 1 and only the first element will be used

Error in if (rnd > 0.95) { : the condition has length > 1

rnd <- runif(3)
rnd
#> [1] 0.9956224 0.6844746 0.5258825
if (all(rnd > 0.95)) {
"you are awfully lucky"
} else if (any(rnd > 0.95)) {
"you are lucky"
} else {
"sorry, you are out of luck"
}
#> [1] "you are lucky"

#### 11.1.1.4 Vecterized if else: ifelse()

ifelse(test, yes, no)

m1 <- matrix(sample(1:100, 16), 4)
m1
#>      [,1] [,2] [,3] [,4]
#> [1,]   27   80   79   87
#> [2,]   56   58   69   99
#> [3,]   89   42   53   44
#> [4,]   41   35   50    8
print("检查每个元素是奇数还是偶数")
#> [1] "检查每个元素是奇数还是偶数"
ifelse(m1 %% 2 == 0, "even", "odd")
#>      [,1]   [,2]   [,3]   [,4]
#> [1,] "odd"  "even" "odd"  "odd"
#> [2,] "even" "even" "odd"  "odd"
#> [3,] "odd"  "even" "odd"  "even"
#> [4,] "odd"  "odd"  "even" "even"
# 等价于 ifelse(m1 %% 2 == 0, matrix("even", 4), matrix("odd", 4))
print("将所有奇数变成偶数，偶数维持不变")
#> [1] "将所有奇数变成偶数，偶数维持不变"
ifelse(m1 %% 2 == 0, m1, m1 + 1)
#>      [,1] [,2] [,3] [,4]
#> [1,]   28   80   80   88
#> [2,]   56   58   70  100
#> [3,]   90   42   54   44
#> [4,]   42   36   50    8

### 11.1.2switch (optional)

switch结构可以视作是多个condif结构的等价写法，其基本语法结构如下：

switch (object,
case1 = expr1,
case2 = expr2
)

input <- 1
switch(input,
"1" = print("a"),
"2" = print("b"),
"3" = print("c"),
stop("No pre-set output for current input")
)
#> [1] "a"

switch结构也可以通过 Snippets 快速键入：

## 11.2 Loop

Loop 结构包括三种，forwhilerepeat

### 11.2.1for

for(var in seq) {
expr
}

for (i in c("A", "d", "r")) {
print(i)  # 必须用 print 才能输出
}
#> [1] "A"
#> [1] "d"
#> [1] "r"

for结构也可以通过 Snippets 快速键入：

for结构有以下使用技巧\注意事项：

#### 11.2.1.1seq can be any object

# character
df <- data.frame(score_last = c(100, 88, 93), score_current = c(99, 96, 77))
for (i in names(df)) {
print(i)
print(mean(df[, i]))
}
#> [1] "score_last"
#> [1] 93.66667
#> [1] "score_current"
#> [1] 90.66667
# numeric
df <- data.frame(score_last = c(100, 88, 93), score_current = c(99, 96, 77))
for (i in 1:length(df)) {
print(i)
print(mean(df[, i]))
}
#> [1] 1
#> [1] 93.66667
#> [1] 2
#> [1] 90.66667
# numeric
df <- data.frame(score_last = c(100, 88, 93), score_current = c(99, 96, 77))
for (i in df) {
print(i)
print(mean(i))
}
#> [1] 100  88  93
#> [1] 93.66667
#> [1] 99 96 77
#> [1] 90.66667
# numeric
m1 <- matrix(1:16, 4, 4)
cumsum_m1 <- 0
for (i in m1) {
print(i)
cumsum_m1 <- cumsum_m1 + i
}
#> [1] 1
#> [1] 2
#> [1] 3
#> [1] 4
#> [1] 5
#> [1] 6
#> [1] 7
#> [1] 8
#> [1] 9
#> [1] 10
#> [1] 11
#> [1] 12
#> [1] 13
#> [1] 14
#> [1] 15
#> [1] 16
print(cumsum_m1)
#> [1] 136
# numeric
l1 <- list(1:2, "a", list(1, 2, 3))
for (i in l1) {
print(i)
}
#> [1] 1 2
#> [1] "a"
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] 3

#### 11.2.1.2seq is a fixed anonymous object within for

seq的本质是一个短暂存在的anonymous object，随着for结构的开始而出现，随着for结构的结束而消亡。这也就意味着，如果使用一个已经存在的 object 作为seq，一旦for结构开始执行，所使用的seq就固定了，在for结构的内部改动该 object 不会改变seq，因为二者是两个完全不同的 objects。

vec_num <- 1:5
for (i in vec_num) {
vec_num <- 100
cat("i =", i, "vec_num =", vec_num, "\n")
}
#> i = 1 vec_num = 100
#> i = 2 vec_num = 100
#> i = 3 vec_num = 100
#> i = 4 vec_num = 100
#> i = 5 vec_num = 100

#### 11.2.1.3 Use seq_along()

seq_along(x)产生一个和x等长的自然数序列，起点为 1，终点为x的长度。

vec <- runif(3)
seq_along(vec)
#> [1] 1 2 3
vec <- NULL
seq_along(vec)
#> integer(0)

for结构的常见用途是重复某个操作一定次数，并且将这些重复操作所得结果储存，例如

means <- c(80, 90, 100)
out_colon <- vector("list", length(means))
for (i in 1:length(means)) {
out_colon[[i]] <- rnorm(10, means[[i]])
}
out_colon
#> [[1]]
#>  [1] 81.25748 79.27110 79.43598 80.06847 79.65719 80.22927
#>  [7] 80.12015 81.19501 79.91482 79.74794
#>
#> [[2]]
#>  [1] 89.79321 90.52250 91.62035 90.21092 89.23495 88.46115
#>  [7] 88.72513 90.88614 90.20188 90.15424
#>
#> [[3]]
#>  [1]  99.61752  98.20974 100.49171  98.48389 101.50276
#>  [6] 100.23364  99.78485  99.53711 100.08421  99.63447

length(x)中的x长度大于 0 时，使用1:length(x)和使用seq_along()效果一样，

means <- c(80, 90, 100)
out_seqalong <- vector("list", length(means))
for (i in seq_along(means)) {
out_seqalong[[i]] <- rnorm(10, means[[i]])
}
out_seqalong
#> [[1]]
#>  [1] 81.91241 80.40467 80.59630 79.90342 79.75576 80.01332
#>  [7] 81.22786 80.47327 82.11356 79.95009
#>
#> [[2]]
#>  [1] 89.82772 91.18811 89.19908 89.86698 89.70423 88.48312
#>  [7] 90.70163 87.93544 90.93267 89.47287
#>
#> [[3]]
#>  [1] 100.34902  99.75869  99.00645  99.06292 100.31556
#>  [6]  97.65313  99.71816 100.48908  99.44960 100.07632

means <- c()
out_seqalong <- vector("list", length(means))
for (i in 1:length(means)) {
out_colon[[i]] <- rnorm(10, means[[i]])
}
#> Error in rnorm(10, means[[i]]): invalid arguments
out_colon
#> [[1]]
#>  [1] 81.25748 79.27110 79.43598 80.06847 79.65719 80.22927
#>  [7] 80.12015 81.19501 79.91482 79.74794
#>
#> [[2]]
#>  [1] 89.79321 90.52250 91.62035 90.21092 89.23495 88.46115
#>  [7] 88.72513 90.88614 90.20188 90.15424
#>
#> [[3]]
#>  [1]  99.61752  98.20974 100.49171  98.48389 101.50276
#>  [6] 100.23364  99.78485  99.53711 100.08421  99.63447

length(c())
#> [1] 0
1:length(c())  # equivalent to 1:0
#> [1] 1 0

means <- c()
out_seqalong <- vector("list", length(means))
seq_along(means)
#> integer(0)
for (i in seq_along(means)) {
out_seqalong[[i]] <- rnorm(10, means[[i]])
}
out_seqalong
#> list()

#### 11.2.1.4var is a named object

seq不同，var不是一个 anonymous object，它随着for结构的开始而出现，但不会随着for结构的结束而消亡，所以有两个特点：

• var在当次 loop 内可以更改，但是不会影响下次 loop。
for (i in 1:5) {
cat("the var used in the current loop is", i, "\n")
i <- i + 5
cat("the var now has been changed to", i, "\n")
}
#> the var used in the current loop is 1
#> the var now has been changed to 6
#> the var used in the current loop is 2
#> the var now has been changed to 7
#> the var used in the current loop is 3
#> the var now has been changed to 8
#> the var used in the current loop is 4
#> the var now has been changed to 9
#> the var used in the current loop is 5
#> the var now has been changed to 10
• varfor结构执行完毕后会储存在该for结构所在的 environment 里，其 value 为 seq的最后一个 element。
for (i in 1:5) {
}
i
#> [1] 5

#### 11.2.1.5 Use var as subscript

#  前序代码
library(openxlsx)
data_ori <- read.xlsx("F:/Nutstore backup/R/codes/RBA/data/Arena of Valor_midterm dataset.xlsx")
#>     角色名 职业 移速 攻击范围 生存能力 攻击伤害 上手难度
#> 1       曜 战士  370     近程        5        7        6
#> 2     西施 法师  360     远程        6        8        6
#> 3     嫦娥 法师  395     远程       10       10        5
#> 4     盘古 战士  380     近程       10        8        6
#> 5       瑶 辅助  360     远程       10        7        5
#> 6 上官婉儿 法师  396     远程        5        7        6
#>   Pick场数 总击杀 场均击杀 场均死亡 场均助攻 Ban场数 胜率
#> 1       11     16     1.45     2.00     3.45       1 0.45
#> 2      175    222     1.27     1.62     6.90     161 0.54
#> 3       64    168     2.58     1.86     5.11      80 0.46
#> 4       95    248     2.61     2.61     5.41      50 0.49
#> 5       53     24     0.45     2.49     7.19      66 0.40
#> 6       55    163     2.96     3.24     3.78      74 0.45
#>   Ban率 热度
#> 1  0.00 0.01
#> 2  0.20 0.42
#> 3  0.10 0.18
#> 4  0.06 0.18
#> 5  0.08 0.15
#> 6  0.09 0.16
data_30 <- data_ori[c(-7, -13, -15, -16)]
data_30 <- data_30[data_30[7] >= 30, ]
type_hero <- names(table(data_30$职业)) ave_kill_1 <- c(mean(data_30$"总击杀"[data_30$职业 == type_hero[1]]), mean(data_30$"总击杀"[data_30$职业 == type_hero[2]]), mean(data_30$"总击杀"[data_30$职业 == type_hero[3]]), mean(data_30$"总击杀"[data_30$职业 == type_hero[4]]), mean(data_30$"总击杀"[data_30$职业 == type_hero[5]]), mean(data_30$"总击杀"[data_30$职业 == type_hero[6]])) # 改用 for ave_kill_2 <- rep(0, length(type_hero)) for (i in 1:length(type_hero)) { ave_kill_2[i] <- mean(data_30$"总击杀"[data_30$"职业" == type_hero[i]]) } ave_kill_1 == ave_kill_2 #> [1] TRUE TRUE TRUE TRUE TRUE TRUE 但凡是采用这个使用技巧时，如果for结构里发现有语句是通过subcript取子集，但又没使用该for结构中的var作为下标，很有可能就是出错了。 例 1： rm(list=ls()) set.seed(1) J<-1000 I<-30 K<-30 D<-1.7 X<-matrix(NA,J,I) P<-matrix(NA,J,I) theta<-rnorm(J,0,1) b<-rnorm(I,0,1) theta[theta>3]<-3 theta[theta<-3]<- -3 b[b>3]<-3 b[b<-3]<- -3 for(j in 1:J){ for(i in 1:I){ P[j,i]<-1/(1+exp(-D*(theta[j]-b[i]))) r<-runif(1,0,1) if(P[j,i]<r){ X[j,i]<-0 }else{ X[j,i]<-1 } } } theta_k<-seq(-3,3,length.out=K) theta_end<-matrix(NA,J,1) L_k<-matrix(NA,K,1) for(j in 1:J){ for(k in 1:K){ p_k<-1/(1+exp(-D*(theta_k[k]-b))) for(i in 1:I){ p_k[i]<-ifelse(X[j,i]==0, 1-p_k[i], p_k[i]) L_k[k]<-prod(p_k) } fenzi<-sum(theta_k*L_k*((1/sqrt(2*pi))*exp(-(theta_k)^2/2))) fenmu<-sum(L_k*((1/sqrt(2*pi))*exp(-(theta_k)^2/2))) } theta_end[j]<-fenzi/fenmu } print(mean(abs(theta_end-theta))) 例 2： # Hello everyone : ) # # # # I was trying to write a function for evaluating the p-values of the t-test of a lm model, I know is a little bit silly and probably useless but I want to practice. # # The issue here is that it only evaluates the first variable. # # Here is the code: #Data library(ISLR2) Auto = tibble(Auto) #Model lm.fit = lm(mpg ~ horsepower, data = Auto) #Function for evaluate p-values tStest = function(x) { x = as.numeric(x) a = rep(0,length(x)) for(i in seq_along(x)) { if (x[i] > 0.025) { a[i] = 'Accept Ho' } else { a = 'Reject Ho' } } print(a) } pv = summary(lm.fit)$coefficients[, 4] #p-values

tStest(pv) #only returns one value

#But it works with a simple vector

v = c(1,2,3)

tStest(v)

# Does anyone know where is the problem? Also I'm interested in other approaches to achieve the same objective
#
# Sorry about my broken english, and thank you in advance

#### 11.2.1.6 Initialize output object

library(openxlsx)
data_ori <- read.xlsx("F:/Nutstore backup/R/codes/RBA/data/Arena of Valor_midterm dataset.xlsx")
data_30 <- data_ori[c(-7, -13, -15, -16)]
data_30 <- data_30[data_30[7] >= 30, ]
type_hero <- names(table(data_30$"职业")) for (i in 1:length(type_hero)) { ave_kill_2[i] <- mean(data_30$"总击杀"[data_30\$"职业" == type_hero[i]])
}

#### 11.2.1.7 Alter the process of loop structure

• next：跳过当前 loop
for (i in 1:3) {
if (i == 2) next
print(i)
}
#> [1] 1
#> [1] 3
• break：跳出当前 loop 结构
for (i in 1:3) {
if (i == 3) break
print(i)
}
#> [1] 1
#> [1] 2

nextbreak适用于所有三种 loop 结构。

#### 11.2.1.9 The best scenario to use for

for结构最适合的任务情境应当是前一次 loop 和后一次 loop 有依赖关系，这种情况只有按顺序执行的 loop 结构才能够处理。不同次 loop 之间没有任何联系彼此独立时，都可以有替代for结构的写法。

Loops 间存在联系的示例：

num_ite <- 20
a <- rep(0, num_ite)
for (i in 1:num_ite) {
if (i != 1) {
a[i] <- a[i - 1] + i
}
}
a
#>  [1]   0   2   5   9  14  20  27  35  44  54  65  77  90 104
#> [15] 119 135 152 170 189 209

### 11.2.2while

while结构可以视作是for+if

while(cond) {
expr
}

while结构执行前，当cond的执行结果为TRUE时，才会进入while结构，否则会直接跳过。进入while结构后，执行expr，然后会检查cond的执行结果是否为TRUE，是则继续下一次 loop，否则跳出while结构。

# forever loop
i <- 1
while (i <= 5) {
print(i)
}
# make sure:
#   1. cond is modified within each loop
#   2. loop can always be jumped out of
set.seed(123)
rnd_unif <- runif(1, -1, 1)
cum_sum_rnd_unif <- 0
count <- 1
while (cum_sum_rnd_unif <= 1) {
cum_sum_rnd_unif <- cum_sum_rnd_unif + rnd_unif
print(cum_sum_rnd_unif)
count <- count + 1
if (count > 20) break
}
#> [1] -0.424845
#> [1] -0.8496899
#> [1] -1.274535
#> [1] -1.69938
#> [1] -2.124225
#> [1] -2.54907
#> [1] -2.973915
#> [1] -3.39876
#> [1] -3.823605
#> [1] -4.24845
#> [1] -4.673295
#> [1] -5.09814
#> [1] -5.522984
#> [1] -5.947829
#> [1] -6.372674
#> [1] -6.797519
#> [1] -7.222364
#> [1] -7.647209
#> [1] -8.072054
#> [1] -8.496899
print(count)
#> [1] 21

while结构也可以通过 Snippets 快速键入：

### 11.2.3repeat

repeat结构同样也可以视作是for+if

repeat {
expr
}

i <- 1
repeat {
print(i)
i <- i + 1
if (i > 5) {
break
}
}
#> [1] 1
#> [1] 2
#> [1] 3
#> [1] 4
#> [1] 5

set.seed(123)
rnd_unif <- runif(1, -1, 1)
cum_sum_rnd_unif <- 0
n_loop <- 20
for (i in 1:n_loop) {
cum_sum_rnd_unif <- cum_sum_rnd_unif + rnd_unif
print(cum_sum_rnd_unif)
}
#> [1] -0.424845
#> [1] -0.8496899
#> [1] -1.274535
#> [1] -1.69938
#> [1] -2.124225
#> [1] -2.54907
#> [1] -2.973915
#> [1] -3.39876
#> [1] -3.823605
#> [1] -4.24845
#> [1] -4.673295
#> [1] -5.09814
#> [1] -5.522984
#> [1] -5.947829
#> [1] -6.372674
#> [1] -6.797519
#> [1] -7.222364
#> [1] -7.647209
#> [1] -8.072054
#> [1] -8.496899

## 11.3 Recap

1. 不论是使用if结构还是switch结构，在构建包含多个互斥conds 的结构时都需要将所有的情况都考虑到；
2. if结构中，cond为 logical vector 时，根据需要运用all()（相当于&）和any()（相当于|）；
3. ifelse()适用于快速检验 vector、matrix、array、data.frame 中的所有 elements，是if else的向量化写法；
4. for结构中，输出结果需初始化；
5. loop 结构中，next跳过当前 loop，break跳出整个 loop 结构；
6. 必须要使用 loop 结构时，尽量使用for结构。