7 How to create contingency tables

We can use table(), addmargins(), prop.table() and as.data.frame.matrix() to create the contingency tables that we want. See this example:

rm(list=ls())

# load packages
library(dplyr)

# create a fake data set
fk_data <- data.frame(x1 = sample(letters[1:5], 20, replace = TRUE), 
                      x2 = sample(LETTERS[1:5], 20, replace = TRUE))

# have a look at the data set
print.data.frame(fk_data)

##    x1 x2
## 1   a  C
## 2   d  A
## 3   d  C
## 4   b  C
## 5   b  C
## 6   c  C
## 7   a  A
## 8   a  B
## 9   b  A
## 10  c  D
## 11  a  B
## 12  d  D
## 13  c  C
## 14  d  D
## 15  d  E
## 16  c  E
## 17  a  C
## 18  d  C
## 19  a  A
## 20  c  B

# create a table
my_table_0 <- table(fk_data$x1, fk_data$x2)
print.table(my_table_0)

##    
##     A B C D E
##   a 2 2 2 0 0
##   b 1 0 2 0 0
##   c 0 1 2 1 1
##   d 1 0 2 2 1

# if we want to have row and column totals
my_table_01 <- addmargins(my_table_0)
print.table(my_table_01)

##      
##        A  B  C  D  E Sum
##   a    2  2  2  0  0   6
##   b    1  0  2  0  0   3
##   c    0  1  2  1  1   5
##   d    1  0  2  2  1   6
##   Sum  4  3  8  3  2  20

my_table_1 <- as.data.frame.matrix(my_table_0) # convert it to dataframe
# have a look at the table
print.data.frame(my_table_1)

##   A B C D E
## a 2 2 2 0 0
## b 1 0 2 0 0
## c 0 1 2 1 1
## d 1 0 2 2 1

# to have a table of proportions based on rows
my_table_2 <- prop.table(my_table_0, margin = 1) %>% 
              as.data.frame.matrix() # convert it to dataframe   
# have a look at the table
print.data.frame(my_table_2, digits = 2)

##      A    B    C    D    E
## a 0.33 0.33 0.33 0.00 0.00
## b 0.33 0.00 0.67 0.00 0.00
## c 0.00 0.20 0.40 0.20 0.20
## d 0.17 0.00 0.33 0.33 0.17

# to have a table of proportions based on columns
my_table_3 <- prop.table(my_table_0, margin = 2) %>% 
  as.data.frame.matrix() # convert it to dataframe   
# have a look at the table
print.data.frame(my_table_3, digits = 2)

##      A    B    C    D   E
## a 0.50 0.67 0.25 0.00 0.0
## b 0.25 0.00 0.25 0.00 0.0
## c 0.00 0.33 0.25 0.33 0.5
## d 0.25 0.00 0.25 0.67 0.5

Remark: If there are NA’s, table() function will ignore them. If we want to include NA’s in the table, we can use dplyr::tally() plus tidyr::spread(); the following example shows how to do this. For more details about dplyr::tally(), see the next chapter, How to tally.

rm(list = ls())

# load packages
library(dplyr)
library(tidyr) # for spread()

# create a fake data set
fk_data <- data.frame(category_1 = c(rep("A", 3), "B", rep("C", 2), NA, NA), 
                      category_2 = c(rep("a", 2), rep("b", 2), rep(NA, 3), "c"))

# show the tale created by using table()
print.table(table(fk_data$category_1, fk_data$category_2))

##    
##     a b c
##   A 2 1 0
##   B 0 1 0
##   C 0 0 0

# create a contingency table using dplyr::tally and tidyr::spread
a_table <-
  fk_data %>% 
  group_by(category_1, category_2) %>% 
  tally() %>% 
  spread(key = category_2, value = n)
print.data.frame(a_table)

##   category_1  a  b  c <NA>
## 1          A  2  1 NA   NA
## 2          B NA  1 NA   NA
## 3          C NA NA NA    2
## 4       <NA> NA NA  1    1