## 6.2 提取子集

subset(x, subset, select, drop = FALSE, ...)

subset(iris, subset = Species == "virginica" & Sepal.Length > 7.5)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 106          7.6         3.0          6.6         2.1 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
# summary(iris$Sepal.Length) mean(iris$Sepal.Length)
# 且的逻辑
# subset(iris, Species == "virginica" & Sepal.Length > 5.8)
subset(iris, Species == "virginica" &
Sepal.Length == median(Sepal.Length))
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 102          5.8         2.7          5.1         1.9 virginica
## 115          5.8         2.8          5.1         2.4 virginica
## 143          5.8         2.7          5.1         1.9 virginica
# 在行的子集范围内
subset(iris, Species %in% c("virginica", "versicolor") &
Sepal.Length == median(Sepal.Length))
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 68           5.8         2.7          4.1         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 102          5.8         2.7          5.1         1.9  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 143          5.8         2.7          5.1         1.9  virginica
# 在列的子集内 先选中列
subset(iris, Sepal.Length == median(Sepal.Length),
select = c("Sepal.Length", "Species")
)
##     Sepal.Length    Species
## 15           5.8     setosa
## 68           5.8 versicolor
## 83           5.8 versicolor
## 93           5.8 versicolor
## 102          5.8  virginica
## 115          5.8  virginica
## 143          5.8  virginica

## sometimes requiring a logical 'subset' argument is a nuisance
nm <- rownames(state.x77)
start_with_M <- nm %in% grep("^M", nm, value = TRUE)
subset(state.x77, start_with_M, Illiteracy:Murder)
##               Illiteracy Life Exp Murder
## Maine                0.7    70.39    2.7
## Maryland             0.9    70.22    8.5
## Massachusetts        1.1    71.83    3.3
## Michigan             0.9    70.63   11.1
## Minnesota            0.6    72.96    2.3
## Mississippi          2.4    68.09   12.5
## Missouri             0.8    70.69    9.3
## Montana              0.6    70.56    5.0
# 简化
subset(state.x77, subset = grepl("^M", rownames(state.x77)), select = Illiteracy:Murder)
##               Illiteracy Life Exp Murder
## Maine                0.7    70.39    2.7
## Maryland             0.9    70.22    8.5
## Massachusetts        1.1    71.83    3.3
## Michigan             0.9    70.63   11.1
## Minnesota            0.6    72.96    2.3
## Mississippi          2.4    68.09   12.5
## Missouri             0.8    70.69    9.3
## Montana              0.6    70.56    5.0
# 继续简化
subset(state.x77, grepl("^M", rownames(state.x77)), Illiteracy:Murder)
##               Illiteracy Life Exp Murder
## Maine                0.7    70.39    2.7
## Maryland             0.9    70.22    8.5
## Massachusetts        1.1    71.83    3.3
## Michigan             0.9    70.63   11.1
## Minnesota            0.6    72.96    2.3
## Mississippi          2.4    68.09   12.5
## Missouri             0.8    70.69    9.3
## Montana              0.6    70.56    5.0

iris[iris$Species == "virginica" & iris$Sepal.Length == 5.8, ]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 102          5.8         2.7          5.1         1.9 virginica
## 115          5.8         2.8          5.1         2.4 virginica
## 143          5.8         2.7          5.1         1.9 virginica
iris[iris$Species == "virginica" & iris$Sepal.Length == median(iris$Sepal.Length), ] ## Sepal.Length Sepal.Width Petal.Length Petal.Width Species ## 102 5.8 2.7 5.1 1.9 virginica ## 115 5.8 2.8 5.1 2.4 virginica ## 143 5.8 2.7 5.1 1.9 virginica iris[ iris$Species == "virginica" &
iris$Sepal.Length == median(iris$Sepal.Length),
c("Sepal.Length", "Species")
]
##     Sepal.Length   Species
## 102          5.8 virginica
## 115          5.8 virginica
## 143          5.8 virginica
iris[iris$Species == "setosa" & iris$Sepal.Length > 5.5, grepl("Sepal", colnames(iris))]
##    Sepal.Length Sepal.Width
## 15          5.8         4.0
## 16          5.7         4.4
## 19          5.7         3.8
subset(iris,
subset = Species == "setosa" & Sepal.Length > 5.5,
select = grepl("Sepal", colnames(iris))
)
##    Sepal.Length Sepal.Width
## 15          5.8         4.0
## 16          5.7         4.4
## 19          5.7         3.8

library(data.table)
mtcars\$cars <- rownames(mtcars)
mtcars_df <- as.data.table(mtcars)
mtcars_df[, .(mpg, disp)] |> head()
##     mpg disp
## 1: 21.0  160
## 2: 21.0  160
## 3: 22.8  108
## 4: 21.4  258
## 5: 18.7  360
## 6: 18.1  225
mtcars |>
dplyr::select(mpg, disp) |>
head()
##                    mpg disp
## Mazda RX4         21.0  160
## Mazda RX4 Wag     21.0  160
## Datsun 710        22.8  108
## Hornet 4 Drive    21.4  258
## Valiant           18.1  225