Subsetting Data

Selecting variable(s)/observation(s)

data$group     # extracts 'group' variable values
##  [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
is.vector(data$group)      # Note that a single variable in a dataset is a vector 
## [1] TRUE
data[, "group"]
##  [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
data[, c("group", "score1", "score2")]
##    group score1 score2
## 1      1     35     45
## 2      1     23     14
## 3      1     14     26
## 4      1     17     25
## 5      1     23     27
## 6      1     35     47
## 7      1     27     37
## 8      1     33     50
## 9      1     32     15
## 10     1     31     37
## 11     2     34     48
## 12     2     27     16
## 13     2     51     45
## 14     2     36     26
## 15     2     39     37
## 16     2     45     41
## 17     2     31     25
## 18     2     40     17
## 19     2     25     15
## 20     2     32     27
(scores <- data[, c("score1", "score2")])
##    score1 score2
## 1      35     45
## 2      23     14
## 3      14     26
## 4      17     25
## 5      23     27
## 6      35     47
## 7      27     37
## 8      33     50
## 9      32     15
## 10     31     37
## 11     34     48
## 12     27     16
## 13     51     45
## 14     36     26
## 15     39     37
## 16     45     41
## 17     31     25
## 18     40     17
## 19     25     15
## 20     32     27
data[, 1:2]     # returns the first two columns
##    ID group
## 1   1     1
## 2   2     1
## 3   3     1
## 4   4     1
## 5   5     1
## 6   6     1
## 7   7     1
## 8   8     1
## 9   9     1
## 10 10     1
## 11 11     2
## 12 12     2
## 13 13     2
## 14 14     2
## 15 15     2
## 16 16     2
## 17 17     2
## 18 18     2
## 19 19     2
## 20 20     2
data[c(2, 4), ]       # returns the second and fourth rows
##   ID group score1 score2
## 2  2     1     23     14
## 4  4     1     17     25
data[19, 3]     # returns the value in 19th row and 3rd column
## [1] 25

Selecting subgroup(s)/subset(s)

data[data$group==1, ]     # returns rows that satisfies 'group == 1'
##    ID group score1 score2
## 1   1     1     35     45
## 2   2     1     23     14
## 3   3     1     14     26
## 4   4     1     17     25
## 5   5     1     23     27
## 6   6     1     35     47
## 7   7     1     27     37
## 8   8     1     33     50
## 9   9     1     32     15
## 10 10     1     31     37
subset(data, group==1)    # returns a subset of data that satisfies 'group == 1' 
##    ID group score1 score2
## 1   1     1     35     45
## 2   2     1     23     14
## 3   3     1     14     26
## 4   4     1     17     25
## 5   5     1     23     27
## 6   6     1     35     47
## 7   7     1     27     37
## 8   8     1     33     50
## 9   9     1     32     15
## 10 10     1     31     37
data[data$score1 > 30, ]    # returns observations(rows) for 'score1 larger than 30'
##    ID group score1 score2
## 1   1     1     35     45
## 6   6     1     35     47
## 8   8     1     33     50
## 9   9     1     32     15
## 10 10     1     31     37
## 11 11     2     34     48
## 13 13     2     51     45
## 14 14     2     36     26
## 15 15     2     39     37
## 16 16     2     45     41
## 17 17     2     31     25
## 18 18     2     40     17
## 20 20     2     32     27