4.5 Append (stack)

To combine two datasets that have the same variables (columns) but different observations (rows), append, or stack, them using rbind() (“row bind”). To use rbind(), the two datasets must have the same number of columns and those columns must have the same names or R will return an error (which is a good thing as it prevents a mistake). To avoid this, first check your datasets to be sure the columns match. You want them to match in type, as well, to ensure that you are appending values that belong together (e.g., you might not want to append numeric values onto character values).

# Load example data
load("Data/dat_append_1.RData")
load("Data/dat_append_2.RData")

dim(dat_append_1)
## [1] 300  14
dim(dat_append_2)
## [1] 230  14
names(dat_append_1)
##  [1] "ID"            "Age"           "AgeGp"         "Sex"           "Yrs_From_Dx"  
##  [6] "CDAI"          "CDAI_YN"       "DAS_28"        "DAS28_YN"      "Steroids_GT_5"
## [11] "DMARDs"        "Biologics"     "sDMARDS"       "OsteopScreen"
names(dat_append_2)
##  [1] "ID"            "Age"           "AgeGp"         "Sex"           "Yrs_From_Dx"  
##  [6] "CDAI"          "CDAI_YN"       "DAS_28"        "DAS28_YN"      "Steroids_GT_5"
## [11] "DMARDs"        "Biologics"     "sDMARDS"       "OsteopScreen"
str(dat_append_1)
## 'data.frame':    300 obs. of  14 variables:
##  $ ID           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age          : int  85 86 83 83 85 79 90 90 87 82 ...
##  $ AgeGp        : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Sex          : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ Yrs_From_Dx  : int  27 27 10 9 NA NA 51 11 36 4 ...
##  $ CDAI         : num  NA 23 14.5 NA NA NA NA 40 6 NA ...
##  $ CDAI_YN      : int  1 2 2 1 1 1 1 2 2 1 ...
##  $ DAS_28       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ DAS28_YN     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Steroids_GT_5: int  0 1 1 1 0 0 0 1 0 0 ...
##  $ DMARDs       : int  1 1 1 1 0 0 1 0 0 1 ...
##  $ Biologics    : int  0 0 1 0 0 0 1 0 1 0 ...
##  $ sDMARDS      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ OsteopScreen : int  0 1 1 1 0 0 0 1 1 1 ...
str(dat_append_2)
## 'data.frame':    230 obs. of  14 variables:
##  $ ID           : int  330 331 332 333 334 335 336 337 338 339 ...
##  $ Age          : int  54 69 44 58 65 57 58 67 69 54 ...
##  $ AgeGp        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Sex          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Yrs_From_Dx  : int  12 5 23 7 4 2 2 28 11 10 ...
##  $ CDAI         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CDAI_YN      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ DAS_28       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ DAS28_YN     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Steroids_GT_5: int  1 0 0 0 0 1 0 0 0 0 ...
##  $ DMARDs       : int  1 0 0 1 0 1 0 1 1 0 ...
##  $ Biologics    : int  0 0 0 0 0 1 0 1 1 0 ...
##  $ sDMARDS      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ OsteopScreen : int  1 0 1 0 1 1 0 1 0 1 ...
# All the same except the number of rows
# So OK to stack

dat_append <- rbind(dat_append_1,
                    dat_append_2)

dim(dat_append)
## [1] 530  14
names(dat_append)
##  [1] "ID"            "Age"           "AgeGp"         "Sex"           "Yrs_From_Dx"  
##  [6] "CDAI"          "CDAI_YN"       "DAS_28"        "DAS28_YN"      "Steroids_GT_5"
## [11] "DMARDs"        "Biologics"     "sDMARDS"       "OsteopScreen"

In tidyverse, append using bind_rows():

# Everything the same as above except replace rbind() with bind_rows().
dat_append <- bind_rows(dat_append_1,
                        dat_append_2)

NOTE: Unlike rbind(), bind_rows() has an optional argument .id. From ?bind_rows: “When .id is supplied, a new column of identifiers is created to link each row to its original data frame. The labels are taken from the named arguments to bind_rows(). When a list of data frames is supplied, the labels are taken from the names of the list. If no names are found a numeric sequence is used instead.”

# Add an id variable identifying which source data each observation came from

# No names provided
dat_append <- bind_rows(dat_append_1,
                        dat_append_2,
                        .id = "source.data")

dat_append %>% 
  count(source.data)
##   source.data   n
## 1           1 300
## 2           2 230
# Names provided (use list())
dat_append <- bind_rows(list("dat_append_1" = dat_append_1,
                             "dat_append_2" = dat_append_2),
                        .id = "source.data")

dat_append %>% 
  count(source.data)
##    source.data   n
## 1 dat_append_1 300
## 2 dat_append_2 230