3.7 缺失值
3.7.1 replace_na
replace_na()
用特定值替换缺失值。
3.7.1.1 参数
replace_na(data, replace, ...)
如果参数 data 是 数据框(data.frame),replace_na()
返回 data.frame;如果参数是向量(vector)将返回向量。
3.7.1.2 案例
<- tibble(x = c(1, 2, NA), y = c("a", NA, "b"),z = c(3,4,NA))
df %>% replace_na(list(x = 0, y = "unknown"))
df #> # A tibble: 3 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 1 a 3
#> 2 2 unknown 4
#> 3 0 b NA
%>% dplyr::mutate(x = replace_na(x, 0))
df #> # A tibble: 3 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 1 a 3
#> 2 2 <NA> 4
#> 3 0 b NA
数据框中批量替换多列
%>%
df mutate(across(x:z,replace_na,0))
#> # A tibble: 3 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 1 a 3
#> 2 2 0 4
#> 3 0 b 0
3.7.2 fill
fill()
用上一个或下一个值填充选定列的空值(NA)。
3.7.2.1 参数
fill(data, ..., .direction = c("down", "up", "downup", "updown"))
其中.direction 参数选择填充的方向,down 默认值,向下填充,up 向上填充,downup 先向下然后再向上,updown 先向上然后再向下。
3.7.2.2 案例
<- tibble(x = c(NA,1,NA, 2, NA), y = c(NA,"a",NA, NA, "b"),z = c(NA,3,NA,4,NA)) df
%>% fill(z)
df #> # A tibble: 5 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 NA <NA> NA
#> 2 1 a 3
#> 3 NA <NA> 3
#> 4 2 <NA> 4
#> 5 NA b 4
%>% fill(z,.direction = 'up')
df #> # A tibble: 5 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 NA <NA> 3
#> 2 1 a 3
#> 3 NA <NA> 4
#> 4 2 <NA> 4
#> 5 NA b NA
%>% fill(z,.direction = 'downup')
df #> # A tibble: 5 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 NA <NA> 3
#> 2 1 a 3
#> 3 NA <NA> 3
#> 4 2 <NA> 4
#> 5 NA b 4
%>% fill(z,.direction = 'updown')
df #> # A tibble: 5 x 3
#> x y z
#> <dbl> <chr> <dbl>
#> 1 NA <NA> 3
#> 2 1 a 3
#> 3 NA <NA> 4
#> 4 2 <NA> 4
#> 5 NA b 4
结合dplyr::group_by()
使用
<- tibble::tribble(
squirrels ~group, ~name, ~role, ~n_squirrels,
1, "Sam", "Observer", NA,
1, "Mara", "Scorekeeper", 8,
1, "Jesse", "Observer", NA,
1, "Tom", "Observer", NA,
2, "Mike", "Observer", NA,
2, "Rachael", "Observer", NA,
2, "Sydekea", "Scorekeeper", 14,
2, "Gabriela", "Observer", NA,
3, "Derrick", "Observer", NA,
3, "Kara", "Scorekeeper", 9,
3, "Emily", "Observer", NA,
3, "Danielle", "Observer", NA
)
%>%
squirrels ::group_by(group) %>%
dplyrfill(n_squirrels, .direction = "downup") %>%
::ungroup()
dplyr#> # A tibble: 12 x 4
#> group name role n_squirrels
#> <dbl> <chr> <chr> <dbl>
#> 1 1 Sam Observer 8
#> 2 1 Mara Scorekeeper 8
#> 3 1 Jesse Observer 8
#> 4 1 Tom Observer 8
#> 5 2 Mike Observer 14
#> 6 2 Rachael Observer 14
#> # ... with 6 more rows
3.7.3 drop_na
drop_na()
删除包含缺失值的行。
<- tibble(x = c(1, 2, NA), y = c("a", NA, "b"))
df %>% drop_na()
df #> # A tibble: 1 x 2
#> x y
#> <dbl> <chr>
#> 1 1 a
%>% drop_na(x)
df #> # A tibble: 2 x 2
#> x y
#> <dbl> <chr>
#> 1 1 a
#> 2 2 <NA>
<- "y"
vars %>% drop_na(x, any_of(vars))
df #> # A tibble: 1 x 2
#> x y
#> <dbl> <chr>
#> 1 1 a