3.4 嵌套数据
我目前接触这类数据比较少,想了解详情请查看手册vignette("nest")
。
library(tidyr)
library(dplyr)
library(purrr)
3.4.1 基础
嵌套数据即:数据框中嵌套数据框,如下所示:
<- tibble(
df1 g = c(1, 2, 3),
data = list(
tibble(x = 1, y = 2),
tibble(x = 4:5, y = 6:7),
tibble(x = 10)
)
)
df1#> # A tibble: 3 x 2
#> g data
#> <dbl> <list>
#> 1 1 <tibble [1 x 2]>
#> 2 2 <tibble [2 x 2]>
#> 3 3 <tibble [1 x 1]>
因为data.frame()
的列特性【每列都是列表】:可以做如下操作:
<- tribble(
df2 ~g, ~x, ~y,
1, 1, 2,
2, 4, 6,
2, 5, 7,
3, 10, NA
)%>% nest(data = c(x, y))
df2 #> # A tibble: 3 x 2
#> g data
#> <dbl> <list>
#> 1 1 <tibble [1 x 2]>
#> 2 2 <tibble [2 x 2]>
#> 3 3 <tibble [1 x 2]>
#sample above
#df2 %>% group_by(g) %>% nest()
nest的反面 unnest
%>% unnest(data)
df1 #> # A tibble: 4 x 3
#> g x y
#> <dbl> <dbl> <dbl>
#> 1 1 1 2
#> 2 2 4 6
#> 3 2 5 7
#> 4 3 10 NA
3.4.2 嵌套数据和模型
<- mtcars %>%
mtcars_nested group_by(cyl) %>%
nest()
mtcars_nested#> # A tibble: 3 x 2
#> # Groups: cyl [3]
#> cyl data
#> <int> <list>
#> 1 6 <tibble [7 x 10]>
#> 2 4 <tibble [11 x 10]>
#> 3 8 <tibble [14 x 10]>
<- mtcars_nested %>%
mtcars_nested mutate(model = map(data, function(df) lm(mpg ~ wt, data = df)))
mtcars_nested#> # A tibble: 3 x 3
#> # Groups: cyl [3]
#> cyl data model
#> <int> <list> <list>
#> 1 6 <tibble [7 x 10]> <lm>
#> 2 4 <tibble [11 x 10]> <lm>
#> 3 8 <tibble [14 x 10]> <lm>
<- mtcars_nested %>%
mtcars_nested mutate(model = map(model, predict))
mtcars_nested #> # A tibble: 3 x 3
#> # Groups: cyl [3]
#> cyl data model
#> <int> <list> <list>
#> 1 6 <tibble [7 x 10]> <dbl [7]>
#> 2 4 <tibble [11 x 10]> <dbl [11]>
#> 3 8 <tibble [14 x 10]> <dbl [14]>