4.1 Select a subset of variables
To limit your dataset to a subset of variables in base R, use brackets [ ]
or subset()
.
names(mydat)
## [1] "ID" "Age" "AgeGp" "Sex" "Yrs_From_Dx"
## [6] "CDAI" "CDAI_YN" "DAS_28" "DAS28_YN" "Steroids_GT_5"
## [11] "DMARDs" "Biologics" "sDMARDS" "OsteopScreen" "FIPS"
# Select a subset of variables using [ ]
<- mydat[, c("ID", "Age", "CDAI")]
subdat names(subdat)
## [1] "ID" "Age" "CDAI"
# Select a subset of variables using subset()
<- subset(mydat,
subdat select = c(ID, Age, CDAI))
names(subdat)
## [1] "ID" "Age" "CDAI"
# Select a range of variables using a colon
<- subset(mydat,
subdat select = ID:CDAI)
names(subdat)
## [1] "ID" "Age" "AgeGp" "Sex" "Yrs_From_Dx"
## [6] "CDAI"
# Exclude a variable using a minus sign
<- subset(subdat,
subdat select = -CDAI)
names(subdat)
## [1] "ID" "Age" "AgeGp" "Sex" "Yrs_From_Dx"
In tidyverse
, use select()
. As with subset()
, you name the variables you want to keep, without quotes, or precede with a minus sign the names of variables you want to drop.
# To keep only a subset of variables
<- mydat_tibble %>%
subdat select(ID, Age, CDAI)
names(subdat)
# Select a range of variables using a colon
<- mydat_tibble %>%
subdat select(ID:CDAI)
names(subdat)
# Exclude a variable using a minus sign
<- subdat %>%
subdat select(-CDAI)
names(subdat)