7.4 group functions
7.4.1 group_map、group_modify
group_map()
, group_modify()
and group_walk()
are purrr-style functions that can be used to iterate on grouped tibbles.
iris %>%
group_by(Species) %>%
group_map(~ broom::tidy(lm(Sepal.Length ~ Sepal.Width, data = .x))) %>%
bind_rows()
#> # A tibble: 6 x 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 2.64 0.310 8.51 3.74e-11
#> 2 Sepal.Width 0.690 0.0899 7.68 6.71e-10
#> 3 (Intercept) 3.54 0.563 6.29 9.07e- 8
#> 4 Sepal.Width 0.865 0.202 4.28 8.77e- 5
#> 5 (Intercept) 3.91 0.757 5.16 4.66e- 6
#> 6 Sepal.Width 0.902 0.253 3.56 8.43e- 4
iris %>%
group_by(Species) %>%
group_modify(~ broom::tidy(lm(Sepal.Length ~ Sepal.Width, data = .x)))
#> # A tibble: 6 x 6
#> # Groups: Species [3]
#> Species term estimate std.error statistic p.value
#> <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 setosa (Intercept) 2.64 0.310 8.51 3.74e-11
#> 2 setosa Sepal.Width 0.690 0.0899 7.68 6.71e-10
#> 3 versicolor (Intercept) 3.54 0.563 6.29 9.07e- 8
#> 4 versicolor Sepal.Width 0.865 0.202 4.28 8.77e- 5
#> 5 virginica (Intercept) 3.91 0.757 5.16 4.66e- 6
#> 6 virginica Sepal.Width 0.902 0.253 3.56 8.43e- 4
This is similar to split()
and then map()
:
iris %>%
split(.$Species) %>%
map_dfr(~ broom::tidy(lm(Sepal.Length ~ Sepal.Length, data = .x)))
#> # A tibble: 3 x 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 5.01 0.0498 100. 2.11e-58
#> 2 (Intercept) 5.94 0.0730 81.3 6.14e-54
#> 3 (Intercept) 6.59 0.0899 73.3 9.80e-52
7.4.2 group_nest、group_split、group_keys、group_data
group_nest()
is similar to group_by() + tidyr::nest()
:
iris %>%
as_tibble() %>%
group_nest(Species)
#> # A tibble: 3 x 2
#> Species data
#> <fct> <list>
#> 1 setosa <tibble [50 x 4]>
#> 2 versicolor <tibble [50 x 4]>
#> 3 virginica <tibble [50 x 4]>
group_split()
is a tidy version of base::split()
. In particular, it respects a group_by()-like grouping specification, and refuses to name its result.
iris %>%
as_tibble() %>%
group_split(Species)
#> [[1]]
#> # A tibble: 50 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
#> # ... with 44 more rows
#>
#> [[2]]
#> # A tibble: 50 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 7 3.2 4.7 1.4 versicolor
#> 2 6.4 3.2 4.5 1.5 versicolor
#> 3 6.9 3.1 4.9 1.5 versicolor
#> 4 5.5 2.3 4 1.3 versicolor
#> 5 6.5 2.8 4.6 1.5 versicolor
#> 6 5.7 2.8 4.5 1.3 versicolor
#> # ... with 44 more rows
#>
#> [[3]]
#> # A tibble: 50 x 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 6.3 3.3 6 2.5 virginica
#> 2 5.8 2.7 5.1 1.9 virginica
#> 3 7.1 3 5.9 2.1 virginica
#> 4 6.3 2.9 5.6 1.8 virginica
#> 5 6.5 3 5.8 2.2 virginica
#> 6 7.6 3 6.6 2.1 virginica
#> # ... with 44 more rows
#>
#> attr(,"ptype")
#> # A tibble: 0 x 5
#> # ... with 5 variables: Sepal.Length <dbl>, Sepal.Width <dbl>,
#> # Petal.Length <dbl>, Petal.Width <dbl>, Species <fct>
iris %>%
as_tibble() %>%
group_by(Species) %>%
group_data()
#> # A tibble: 3 x 2
#> Species .rows
#> <fct> <list>
#> 1 setosa <int [50]>
#> 2 versicolor <int [50]>
#> 3 virginica <int [50]>
only grouping variables:
iris %>%
as_tibble() %>%
group_keys(Species)
#> # A tibble: 3 x 1
#> Species
#> <fct>
#> 1 setosa
#> 2 versicolor
#> 3 virginica
only rows:
iris %>%
as_tibble() %>%
group_by(Species) %>%
group_rows()
#> [[1]]
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
#>
#> [[2]]
#> [1] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
#> [20] 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
#> [39] 89 90 91 92 93 94 95 96 97 98 99 100
#>
#> [[3]]
#> [1] 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
#> [20] 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
#> [39] 139 140 141 142 143 144 145 146 147 148 149 150