9 分组计数、求和

9.1 aggregate

aggregate函数可以对R对象按某列分组处理,比如计数或者求和5

type <- c("a", "b", "c", "a", "c", "d", "b", "a", "c", "b")
value <- c(53, 15, 8, 99, 76, 22, 46, 56, 34, 54)
df <- data.frame(type, value)
df
##    type value
## 1     a    53
## 2     b    15
## 3     c     8
## 4     a    99
## 5     c    76
## 6     d    22
## 7     b    46
## 8     a    56
## 9     c    34
## 10    b    54

分组求和,参数by表示按什么分组,要赋一个list对象。

aggregate(df$value, by = list(df$type), sum)
##   Group.1   x
## 1       a 208
## 2       b 115
## 3       c 118
## 4       d  22

分组计数

aggregate(df$value, by = list(df$type), length)
##   Group.1 x
## 1       a 3
## 2       b 3
## 3       c 3
## 4       d 1

平均值

aggregate(df$value, by = list(df$type), mean)
##   Group.1        x
## 1       a 69.33333
## 2       b 38.33333
## 3       c 39.33333
## 4       d 22.00000

按两个标准分组

type_2 <- c("F", "M", "M", "F", "F", "M", "M", "F", "M", "M")
df <- data.frame(df, type_2)
df
##    type value type_2
## 1     a    53      F
## 2     b    15      M
## 3     c     8      M
## 4     a    99      F
## 5     c    76      F
## 6     d    22      M
## 7     b    46      M
## 8     a    56      F
## 9     c    34      M
## 10    b    54      M
aggregate(x = df$value, by = list(df$type, df$type_2), sum)
##   Group.1 Group.2   x
## 1       a       F 208
## 2       c       F  76
## 3       b       M 115
## 4       c       M  42
## 5       d       M  22

aggregate函数支持formula:

aggregate(value ~ type + type_2, sum, data = df)
##   type type_2 value
## 1    a      F   208
## 2    c      F    76
## 3    b      M   115
## 4    c      M    42
## 5    d      M    22

用formula形式时必须有参数data

aggregate(df$value ~ df$type + df$type_2, sum)
## Error in model.frame.default(formula = df$value ~ df$type + df$type_2, : 'data' must be a data.frame, environment, or list

如果不用formula形式则严禁data

aggregate(x = value, by = list(type, type_2), sum, data = df)
## Error in FUN(X[[i]], ...): invalid 'type' (list) of argument

9.2 table

table函数也可以用于计数

table(df$type)
## 
## a b c d 
## 3 3 3 1

9.3 tapply6

tapply(df$value, list(df$type, df$type_2), sum)
##     F   M
## a 208  NA
## b  NA 115
## c  76  42
## d  NA  22

tapply不支持formula