7 How to create contingency tables
We can use table(), addmargins(), prop.table() and as.data.frame.matrix() to create the contingency tables that we want. See this example:
rm(list=ls())
# load packages
library(dplyr)
# create a fake data set
fk_data <- data.frame(x1 = sample(letters[1:5], 20, replace = TRUE),
x2 = sample(LETTERS[1:5], 20, replace = TRUE))
# have a look at the data set
print.data.frame(fk_data)
## x1 x2
## 1 a C
## 2 d A
## 3 d C
## 4 b C
## 5 b C
## 6 c C
## 7 a A
## 8 a B
## 9 b A
## 10 c D
## 11 a B
## 12 d D
## 13 c C
## 14 d D
## 15 d E
## 16 c E
## 17 a C
## 18 d C
## 19 a A
## 20 c B
# create a table
my_table_0 <- table(fk_data$x1, fk_data$x2)
print.table(my_table_0)
##
## A B C D E
## a 2 2 2 0 0
## b 1 0 2 0 0
## c 0 1 2 1 1
## d 1 0 2 2 1
# if we want to have row and column totals
my_table_01 <- addmargins(my_table_0)
print.table(my_table_01)
##
## A B C D E Sum
## a 2 2 2 0 0 6
## b 1 0 2 0 0 3
## c 0 1 2 1 1 5
## d 1 0 2 2 1 6
## Sum 4 3 8 3 2 20
my_table_1 <- as.data.frame.matrix(my_table_0) # convert it to dataframe
# have a look at the table
print.data.frame(my_table_1)
## A B C D E
## a 2 2 2 0 0
## b 1 0 2 0 0
## c 0 1 2 1 1
## d 1 0 2 2 1
# to have a table of proportions based on rows
my_table_2 <- prop.table(my_table_0, margin = 1) %>%
as.data.frame.matrix() # convert it to dataframe
# have a look at the table
print.data.frame(my_table_2, digits = 2)
## A B C D E
## a 0.33 0.33 0.33 0.00 0.00
## b 0.33 0.00 0.67 0.00 0.00
## c 0.00 0.20 0.40 0.20 0.20
## d 0.17 0.00 0.33 0.33 0.17
# to have a table of proportions based on columns
my_table_3 <- prop.table(my_table_0, margin = 2) %>%
as.data.frame.matrix() # convert it to dataframe
# have a look at the table
print.data.frame(my_table_3, digits = 2)
## A B C D E
## a 0.50 0.67 0.25 0.00 0.0
## b 0.25 0.00 0.25 0.00 0.0
## c 0.00 0.33 0.25 0.33 0.5
## d 0.25 0.00 0.25 0.67 0.5
Remark: If there are NA’s, table() function will ignore them. If we want to include NA’s in the table, we can use dplyr::tally() plus tidyr::spread(); the following example shows how to do this. For more details about dplyr::tally(), see the next chapter, How to tally.
rm(list = ls())
# load packages
library(dplyr)
library(tidyr) # for spread()
# create a fake data set
fk_data <- data.frame(category_1 = c(rep("A", 3), "B", rep("C", 2), NA, NA),
category_2 = c(rep("a", 2), rep("b", 2), rep(NA, 3), "c"))
# show the tale created by using table()
print.table(table(fk_data$category_1, fk_data$category_2))
##
## a b c
## A 2 1 0
## B 0 1 0
## C 0 0 0
# create a contingency table using dplyr::tally and tidyr::spread
a_table <-
fk_data %>%
group_by(category_1, category_2) %>%
tally() %>%
spread(key = category_2, value = n)
print.data.frame(a_table)
## category_1 a b c <NA>
## 1 A 2 1 NA NA
## 2 B NA 1 NA NA
## 3 C NA NA NA 2
## 4 <NA> NA NA 1 1