## 1.5group_by() combined with other functions

group_bymutate()函数结合使用时，摘要函数(summary functions，如mean(), median() 等) 将会自动以分组为基础，一些非摘要函数也会受到分组的影响，如偏移函数 lead()、lag() 和排秩函数 min_rank(), row_number()。而普通的数字运算符+ , -、逻辑运算符< , ==，对数运算log()和余数运算 %/%, %%等将无视分组。

arrange() 默认无视分组, .group = TRUE 避免这一点。

# 分组前后 mutate()
df <- tibble(
x = 1:9,
group = rep(c("a", "b", "c"), each = 3)
)

gf <- df %>% group_by(group)

df %>% mutate(mean(x))
#> # A tibble: 9 x 3
#>       x group mean(x)
#>   <int> <chr>     <dbl>
#> 1     1 a             5
#> 2     2 a             5
#> 3     3 a             5
#> 4     4 b             5
#> 5     5 b             5
#> 6     6 b             5
#> # ... with 3 more rows
gf %>% mutate(mean(x))
#> # A tibble: 9 x 3
#> # Groups:   group [3]
#>       x group mean(x)
#>   <int> <chr>     <dbl>
#> 1     1 a             2
#> 2     2 a             2
#> 3     3 a             2
#> 4     4 b             5
#> 5     5 b             5
#> 6     6 b             5
#> # ... with 3 more rows
# Arithmetic operators +, -, *, /, ^ are not affected by group_by().
df %>% mutate(y = x + 2)
#> # A tibble: 9 x 3
#>       x group     y
#>   <int> <chr> <dbl>
#> 1     1 a         3
#> 2     2 a         4
#> 3     3 a         5
#> 4     4 b         6
#> 5     5 b         7
#> 6     6 b         8
#> # ... with 3 more rows
gf %>% mutate(z = x + 2)
#> # A tibble: 9 x 3
#> # Groups:   group [3]
#>       x group     z
#>   <int> <chr> <dbl>
#> 1     1 a         3
#> 2     2 a         4
#> 3     3 a         5
#> 4     4 b         6
#> 5     5 b         7
#> 6     6 b         8
#> # ... with 3 more rows
# The offset functions lead() and lag() respect the groupings in group_by(). The functions lag() and lead() will only return values within each group.
#> # A tibble: 9 x 4
#>   <int> <chr> <int>  <int>
#> 1     1 a        NA      2
#> 2     2 a         1      3
#> 3     3 a         2      4
#> 4     4 b         3      5
#> 5     5 b         4      6
#> 6     6 b         5      7
#> # ... with 3 more rows
#> # A tibble: 9 x 4
#> # Groups:   group [3]
#>   <int> <chr> <int>  <int>
#> 1     1 a        NA      2
#> 2     2 a         1      3
#> 3     3 a         2     NA
#> 4     4 b        NA      5
#> 5     5 b         4      6
#> 6     6 b         5     NA
#> # ... with 3 more rows
# The cumulative and rolling aggregate functions cumsum(), cumprod(), cummin(), cummax(), and cummean() calculate values within each group.
df %>% mutate(cumsum(x))
#> # A tibble: 9 x 3
#>       x group cumsum(x)
#>   <int> <chr>       <int>
#> 1     1 a               1
#> 2     2 a               3
#> 3     3 a               6
#> 4     4 b              10
#> 5     5 b              15
#> 6     6 b              21
#> # ... with 3 more rows
gf %>% mutate(cumsum(x))
#> # A tibble: 9 x 3
#> # Groups:   group [3]
#>       x group cumsum(x)
#>   <int> <chr>       <int>
#> 1     1 a               1
#> 2     2 a               3
#> 3     3 a               6
#> 4     4 b               4
#> 5     5 b               9
#> 6     6 b              15
#> # ... with 3 more rows
# Logical comparisons, <, <=, >, >=, !=, and == are not affected by group_by().
df %>% mutate(x > 0.5)
#> # A tibble: 9 x 3
#>       x group x > 0.5
#>   <int> <chr> <lgl>
#> 1     1 a     TRUE
#> 2     2 a     TRUE
#> 3     3 a     TRUE
#> 4     4 b     TRUE
#> 5     5 b     TRUE
#> 6     6 b     TRUE
#> # ... with 3 more rows
gf %>% mutate(x > 0.5)
#> # A tibble: 9 x 3
#> # Groups:   group [3]
#>       x group x > 0.5
#>   <int> <chr> <lgl>
#> 1     1 a     TRUE
#> 2     2 a     TRUE
#> 3     3 a     TRUE
#> 4     4 b     TRUE
#> 5     5 b     TRUE
#> 6     6 b     TRUE
#> # ... with 3 more rows
# Ranking functions like min_rank() work within each group when used with group_by().
df %>% mutate(min_rank(x))
#> # A tibble: 9 x 3
#>       x group min_rank(x)
#>   <int> <chr>         <int>
#> 1     1 a                 1
#> 2     2 a                 2
#> 3     3 a                 3
#> 4     4 b                 4
#> 5     5 b                 5
#> 6     6 b                 6
#> # ... with 3 more rows
gf %>% mutate(min_rank(x))
#> # A tibble: 9 x 3
#> # Groups:   group [3]
#>       x group min_rank(x)
#>   <int> <chr>         <int>
#> 1     1 a                 1
#> 2     2 a                 2
#> 3     3 a                 3
#> 4     4 b                 1
#> 5     5 b                 2
#> 6     6 b                 3
#> # ... with 3 more rows
# filter works the similar way
df %>% filter(min_rank(x) == 1)
#> # A tibble: 1 x 2
#>       x group
#>   <int> <chr>
#> 1     1 a
gf %>% filter(min_rank(x) == 1)
#> # A tibble: 3 x 2
#> # Groups:   group [3]
#>       x group
#>   <int> <chr>
#> 1     1 a
#> 2     4 b
#> 3     7 c
# arrange() ignores groups when sorting values.
df <- tibble(
x = runif(9),
group = rep(c("a", "b", "c"), each = 3)
)

df %>%
group_by(group) %>%
arrange(x)
#> # A tibble: 9 x 2
#> # Groups:   group [3]
#>        x group
#>    <dbl> <chr>
#> 1 0.0562 c
#> 2 0.337  a
#> 3 0.514  b
#> 4 0.523  b
#> 5 0.525  a
#> 6 0.557  b
#> # ... with 3 more rows

# .by_group = TRUE
df %>%
group_by(group) %>%
arrange(x, .by_group = TRUE)
#> # A tibble: 9 x 2
#> # Groups:   group [3]
#>       x group
#>   <dbl> <chr>
#> 1 0.337 a
#> 2 0.525 a
#> 3 0.785 a
#> 4 0.514 b
#> 5 0.523 b
#> 6 0.557 b
#> # ... with 3 more rows