4.1 Select a subset of variables

To limit your dataset to a subset of variables in base R, use brackets [ ] or subset().

names(mydat)
##  [1] "ID"            "Age"           "AgeGp"         "Sex"           "Yrs_From_Dx"  
##  [6] "CDAI"          "CDAI_YN"       "DAS_28"        "DAS28_YN"      "Steroids_GT_5"
## [11] "DMARDs"        "Biologics"     "sDMARDS"       "OsteopScreen"  "FIPS"
# Select a subset of variables using [ ]
subdat <- mydat[, c("ID", "Age", "CDAI")]
names(subdat)
## [1] "ID"   "Age"  "CDAI"
# Select a subset of variables using subset()
subdat <- subset(mydat,
                 select = c(ID, Age, CDAI))
names(subdat)
## [1] "ID"   "Age"  "CDAI"
# Select a range of variables using a colon
subdat <- subset(mydat,
                 select = ID:CDAI)
names(subdat)
## [1] "ID"          "Age"         "AgeGp"       "Sex"         "Yrs_From_Dx"
## [6] "CDAI"
# Exclude a variable using a minus sign
subdat <- subset(subdat,
                 select = -CDAI)
names(subdat)
## [1] "ID"          "Age"         "AgeGp"       "Sex"         "Yrs_From_Dx"

In tidyverse, use select(). As with subset(), you name the variables you want to keep, without quotes, or precede with a minus sign the names of variables you want to drop.

# To keep only a subset of variables
subdat <- mydat_tibble %>% 
  select(ID, Age, CDAI)

names(subdat)

# Select a range of variables using a colon
subdat <- mydat_tibble %>% 
  select(ID:CDAI)

names(subdat)

# Exclude a variable using a minus sign
subdat <- subdat %>% 
  select(-CDAI)

names(subdat)