## 3.7 缺失值

### 3.7.1 replace_na

replace_na()用特定值替换缺失值。

#### 3.7.1.1 参数

replace_na(data, replace, ...)

#### 3.7.1.2 案例

df <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"),z = c(3,4,NA))
df %>% replace_na(list(x = 0, y = "unknown"))
#> # A tibble: 3 x 3
#>       x y           z
#>   <dbl> <chr>   <dbl>
#> 1     1 a           3
#> 2     2 unknown     4
#> 3     0 b          NA
df %>% dplyr::mutate(x = replace_na(x, 0))
#> # A tibble: 3 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1     1 a         3
#> 2     2 <NA>      4
#> 3     0 b        NA

df %>%
mutate(across(x:z,replace_na,0))
#> # A tibble: 3 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1     1 a         3
#> 2     2 0         4
#> 3     0 b         0

### 3.7.2 fill

fill()用上一个或下一个值填充选定列的空值(NA)。

#### 3.7.2.1 参数

fill(data, ..., .direction = c("down", "up", "downup", "updown"))

#### 3.7.2.2 案例

df <- tibble(x = c(NA,1,NA, 2, NA), y = c(NA,"a",NA, NA, "b"),z = c(NA,3,NA,4,NA))
df %>% fill(z)
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>     NA
#> 2     1 a         3
#> 3    NA <NA>      3
#> 4     2 <NA>      4
#> 5    NA b         4

df %>% fill(z,.direction = 'up')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      4
#> 4     2 <NA>      4
#> 5    NA b        NA

df %>% fill(z,.direction = 'downup')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      3
#> 4     2 <NA>      4
#> 5    NA b         4

df %>% fill(z,.direction = 'updown')
#> # A tibble: 5 x 3
#>       x y         z
#>   <dbl> <chr> <dbl>
#> 1    NA <NA>      3
#> 2     1 a         3
#> 3    NA <NA>      4
#> 4     2 <NA>      4
#> 5    NA b         4

squirrels <- tibble::tribble(
~group,    ~name,     ~role,     ~n_squirrels,
1,      "Sam",    "Observer",   NA,
1,     "Mara", "Scorekeeper",    8,
1,    "Jesse",    "Observer",   NA,
1,      "Tom",    "Observer",   NA,
2,     "Mike",    "Observer",   NA,
2,  "Rachael",    "Observer",   NA,
2,  "Sydekea", "Scorekeeper",   14,
2, "Gabriela",    "Observer",   NA,
3,  "Derrick",    "Observer",   NA,
3,     "Kara", "Scorekeeper",    9,
3,    "Emily",    "Observer",   NA,
3, "Danielle",    "Observer",   NA
)

squirrels %>%
dplyr::group_by(group) %>%
fill(n_squirrels, .direction = "downup") %>%
dplyr::ungroup()
#> # A tibble: 12 x 4
#>   group name    role        n_squirrels
#>   <dbl> <chr>   <chr>             <dbl>
#> 1     1 Sam     Observer              8
#> 2     1 Mara    Scorekeeper           8
#> 3     1 Jesse   Observer              8
#> 4     1 Tom     Observer              8
#> 5     2 Mike    Observer             14
#> 6     2 Rachael Observer             14
#> # ... with 6 more rows

### 3.7.3 drop_na

drop_na()删除包含缺失值的行。

df <- tibble(x = c(1, 2, NA), y = c("a", NA, "b"))
df %>% drop_na()
#> # A tibble: 1 x 2
#>       x y
#>   <dbl> <chr>
#> 1     1 a
df %>% drop_na(x)
#> # A tibble: 2 x 2
#>       x y
#>   <dbl> <chr>
#> 1     1 a
#> 2     2 <NA>
vars <- "y"
df %>% drop_na(x, any_of(vars))
#> # A tibble: 1 x 2
#>       x y
#>   <dbl> <chr>
#> 1     1 a