3.6 扩展数据框

expand_grid()是受 base::expand.grid()影响创造,是所有输入的组合生成的数据框(tibble)。

参数如下:

expand_grid(..., .name_repair = "check_unique")
expand_grid(x = 1:3, y = 1:2)
#> # A tibble: 6 x 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     1     2
#> 3     2     1
#> 4     2     2
#> 5     3     1
#> 6     3     2
df <- expand_grid(x = c("a", NA), y = c("b", NA))
df
#> # A tibble: 4 x 2
#>   x     y    
#>   <chr> <chr>
#> 1 a     b    
#> 2 a     <NA> 
#> 3 <NA>  b    
#> 4 <NA>  <NA>

expand_grid()输入只有两个向量时,类似dplyr::full_join()笛卡尔乘积,如下所示:

expand_grid(letters,LETTERS) %>% 
  glimpse()
#> Rows: 676
#> Columns: 2
#> $ letters <chr> "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a~
#> $ LETTERS <chr> "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M~
full_join(tibble(letters),tibble(LETTERS),by= character()) %>% 
  glimpse()
#> Rows: 676
#> Columns: 2
#> $ letters <chr> "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a~
#> $ LETTERS <chr> "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M~

移除缺失值

df %>% unite("z", x:y, remove = FALSE)
#> # A tibble: 4 x 3
#>   z     x     y    
#>   <chr> <chr> <chr>
#> 1 a_b   a     b    
#> 2 a_NA  a     <NA> 
#> 3 NA_b  <NA>  b    
#> 4 NA_NA <NA>  <NA>

df %>% unite("z", x:y, na.rm = TRUE, remove = FALSE)
#> # A tibble: 4 x 3
#>   z     x     y    
#>   <chr> <chr> <chr>
#> 1 "a_b" a     b    
#> 2 "a"   a     <NA> 
#> 3 "b"   <NA>  b    
#> 4 ""    <NA>  <NA>

合并后再拆分

df %>%
  unite("xy", x:y) %>%
  separate(xy, c("x", "y"))
#> # A tibble: 4 x 2
#>   x     y    
#>   <chr> <chr>
#> 1 a     b    
#> 2 a     NA   
#> 3 NA    b    
#> 4 NA    NA