3.4 嵌套数据

我目前接触这类数据比较少,想了解详情请查看手册vignette("nest")

library(tidyr)
library(dplyr)
library(purrr)

3.4.1 基础

嵌套数据即:数据框中嵌套数据框,如下所示:

df1 <- tibble(
  g = c(1, 2, 3),
  data = list(
    tibble(x = 1, y = 2),
    tibble(x = 4:5, y = 6:7),
    tibble(x = 10)
  )
)
df1
#> # A tibble: 3 x 2
#>       g data            
#>   <dbl> <list>          
#> 1     1 <tibble [1 x 2]>
#> 2     2 <tibble [2 x 2]>
#> 3     3 <tibble [1 x 1]>

因为data.frame()的列特性【每列都是列表】:可以做如下操作:

df2 <- tribble(
  ~g, ~x, ~y,
   1,  1,  2,
   2,  4,  6,
   2,  5,  7,
   3, 10,  NA
)
df2 %>% nest(data = c(x, y))
#> # A tibble: 3 x 2
#>       g data            
#>   <dbl> <list>          
#> 1     1 <tibble [1 x 2]>
#> 2     2 <tibble [2 x 2]>
#> 3     3 <tibble [1 x 2]>

#sample above
#df2 %>% group_by(g) %>% nest()

nest的反面 unnest

df1 %>% unnest(data)
#> # A tibble: 4 x 3
#>       g     x     y
#>   <dbl> <dbl> <dbl>
#> 1     1     1     2
#> 2     2     4     6
#> 3     2     5     7
#> 4     3    10    NA

3.4.2 嵌套数据和模型

mtcars_nested <- mtcars %>% 
  group_by(cyl) %>% 
  nest()

mtcars_nested
#> # A tibble: 3 x 2
#> # Groups:   cyl [3]
#>     cyl data              
#>   <int> <list>            
#> 1     6 <tibble [7 x 10]> 
#> 2     4 <tibble [11 x 10]>
#> 3     8 <tibble [14 x 10]>
mtcars_nested <- mtcars_nested %>% 
  mutate(model = map(data, function(df) lm(mpg ~ wt, data = df)))
mtcars_nested
#> # A tibble: 3 x 3
#> # Groups:   cyl [3]
#>     cyl data               model 
#>   <int> <list>             <list>
#> 1     6 <tibble [7 x 10]>  <lm>  
#> 2     4 <tibble [11 x 10]> <lm>  
#> 3     8 <tibble [14 x 10]> <lm>
mtcars_nested <- mtcars_nested %>% 
  mutate(model = map(model, predict))
mtcars_nested  
#> # A tibble: 3 x 3
#> # Groups:   cyl [3]
#>     cyl data               model     
#>   <int> <list>             <list>    
#> 1     6 <tibble [7 x 10]>  <dbl [7]> 
#> 2     4 <tibble [11 x 10]> <dbl [11]>
#> 3     8 <tibble [14 x 10]> <dbl [14]>