3.7 缺失值

3.7.1 replace_na

replace_na()用特定值替换缺失值。

3.7.1.1 参数

replace_na(data, replace, ...)

如果参数 data 是 数据框(data.frame),replace_na()返回 data.frame;如果参数是向量(vector)将返回向量。

3.7.1.2 案例

df <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"),z = c(3,4,NA))
df %>% replace_na(list(x = 0, y = "unknown"))
#> # A tibble: 3 x 3
#>       x y           z
#>   <dbl> <chr>   <dbl>
#> 1     1 a           3
#> 2     2 unknown     4
#> 3     0 b          NA
df %>% dplyr::mutate(x = replace_na(x, 0))
#> # A tibble: 3 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1     1 a         3
#> 2     2 <NA>      4
#> 3     0 b        NA

数据框中批量替换多列

df %>% 
  mutate(across(x:z,replace_na,0))
#> # A tibble: 3 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1     1 a         3
#> 2     2 0         4
#> 3     0 b         0

3.7.2 fill

fill()用上一个或下一个值填充选定列的空值(NA)。

3.7.2.1 参数

fill(data, ..., .direction = c("down", "up", "downup", "updown"))

其中.direction 参数选择填充的方向,down 默认值,向下填充,up 向上填充,downup 先向下然后再向上,updown 先向上然后再向下。

3.7.2.2 案例

df <- tibble(x = c(NA,1,NA, 2, NA), y = c(NA,"a",NA, NA, "b"),z = c(NA,3,NA,4,NA))
df %>% fill(z)
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>     NA
#> 2     1 a         3
#> 3    NA <NA>      3
#> 4     2 <NA>      4
#> 5    NA b         4

df %>% fill(z,.direction = 'up')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      4
#> 4     2 <NA>      4
#> 5    NA b        NA

df %>% fill(z,.direction = 'downup')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      3
#> 4     2 <NA>      4
#> 5    NA b         4

df %>% fill(z,.direction = 'updown')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      4
#> 4     2 <NA>      4
#> 5    NA b         4

结合dplyr::group_by()使用

squirrels <- tibble::tribble(
  ~group,    ~name,     ~role,     ~n_squirrels,
  1,      "Sam",    "Observer",   NA,
  1,     "Mara", "Scorekeeper",    8,
  1,    "Jesse",    "Observer",   NA,
  1,      "Tom",    "Observer",   NA,
  2,     "Mike",    "Observer",   NA,
  2,  "Rachael",    "Observer",   NA,
  2,  "Sydekea", "Scorekeeper",   14,
  2, "Gabriela",    "Observer",   NA,
  3,  "Derrick",    "Observer",   NA,
  3,     "Kara", "Scorekeeper",    9,
  3,    "Emily",    "Observer",   NA,
  3, "Danielle",    "Observer",   NA
)

squirrels %>%
  dplyr::group_by(group) %>%
  fill(n_squirrels, .direction = "downup") %>%
  dplyr::ungroup()
#> # A tibble: 12 x 4
#>   group name    role        n_squirrels
#>   <dbl> <chr>   <chr>             <dbl>
#> 1     1 Sam     Observer              8
#> 2     1 Mara    Scorekeeper           8
#> 3     1 Jesse   Observer              8
#> 4     1 Tom     Observer              8
#> 5     2 Mike    Observer             14
#> 6     2 Rachael Observer             14
#> # ... with 6 more rows

3.7.3 drop_na

drop_na()删除包含缺失值的行。

df <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"))
df %>% drop_na()
#> # A tibble: 1 x 2
#>       x y    
#>   <dbl> <chr>
#> 1     1 a
df %>% drop_na(x)
#> # A tibble: 2 x 2
#>       x y    
#>   <dbl> <chr>
#> 1     1 a    
#> 2     2 <NA>
vars <- "y"
df %>% drop_na(x, any_of(vars))
#> # A tibble: 1 x 2
#>       x y    
#>   <dbl> <chr>
#> 1     1 a