10 How to plot data
How to plot data? This is a big question, and here I can give a quick/brief answer, which is this two-step procedure. Step 1: Get the data ready. Step 2: Use ggplot2 package (or another package, e.g. treemap package, for some a specific plot). In the following 13 sections, I will use examples to illustrate the two-step procedure.
10.1 Creating basic bar charts
Essentially, a basic bar chart is a plot of a categorical variable on x-axis and a numerical variable on y-axis.
Example 1: a basic bar chart.
rm(list = ls())
# load packages
library(ggplot2)
# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana")
the_fruits <- sample(fruits, 100, replace = TRUE)
plotting_df <- as.data.frame.table(table(the_fruits))
# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = Freq)) +
# NB: use "weight = Freq" instead of "y = Freq"
geom_bar(width = 0.5, fill = "blue") +
# NB: use "width" and "fill" to change the default bar width and color
labs(x = "", y = "Frequency",
title = "A basic bar chart for a basket of fruits") +
theme(plot.title = element_text(hjust = 0.5)) +
# NB: use theme to center the title
geom_text(aes(x = the_fruits, y = Freq + 1, label = Freq))
# NB: use "geom_text" to put the the numbers to indicate heights of bars
print(p)
Example 2: still a basic bar chart but making the bars horizontal and based on percentage
rm(list = ls())
# load packages
library(ggplot2)
library(dplyr)
# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum",
"kiwi fruit", "peach", "mango", "lemon")
the_fruits <- sample(fruits, 1000, replace = TRUE)
a_table <- table(the_fruits)
plotting_df <-
as.data.frame.table(a_table) %>%
mutate(proportion = Freq / sum(Freq))
## Create a vector to order the fruits in terms of proportion
for_sorting <- plotting_df %>%
arrange(proportion)
fruits_order <- for_sorting$the_fruits
# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = proportion)) +
# NB: use "weight = proportion" instead of "y = proportion"
geom_bar(width = 0.5, fill = "blue") +
# NB: use "width" and "fill" to change the default bar width and color
labs(x = "", y = "",
title = "A 'horizontal' bar chart for a basket of fruits") +
coord_flip() +
# NB: use "coord_flip" to flip coordinates
scale_x_discrete(limits = fruits_order) +
# NB: use the above to set the order of bars
scale_y_continuous(limits = c(0, max(plotting_df$proportion)+0.015)) +
# NB: use the above to make the plot slightly bigger than the default one
geom_text(aes(x = the_fruits, y = proportion + 0.006,
label = scales::percent(proportion))) +
# NB: use the above to put the the pentage numbers to indicate lengths of bars
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks = element_blank())
# NB: use theme to center the title, to remove axis text and ticks
print(p)
10.2 Creating side-by-side and stacked bar charts
Example 3
rm(list = ls())
# load packages
library(ggplot2)
library(dplyr)
#-------------------
# Aim: To plot numbers of all kinds of fruits in "local" and "imported" groups
#-------------------
# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum",
"kiwi fruit", "peach", "mango", "lemon")
origin <- c("local", "imported")
a_df <- data.frame(the_fruits = sample(fruits, 1000, replace = TRUE),
the_origin = sample(origin, 1000, replace = TRUE))
plotting_df <-
a_df %>%
group_by(the_origin, the_fruits) %>%
summarise(Freq = n())
# ------------ side-by-side bar chart --------------
## set the order of bars according to local fruits
temp_df_1 <-
plotting_df %>%
filter(the_origin == "local") %>%
arrange(Freq)
the_order <- temp_df_1$the_fruits
## plot a side-by-side bar chart
p1 <-
plotting_df %>%
ggplot(aes(x = the_fruits, weight = Freq, fill = the_origin)) +
geom_bar(position = "dodge", width = 0.75) +
# NB: use the above to plot bars in the certain order
coord_flip() +
scale_x_discrete(limits = the_order) +
labs(x = "", y = "Number of fruits in the 'basket'") +
scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
# NB: use the above to change the default order and color of legend
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text = element_text(size=12),
axis.title = element_text(size=14),
plot.title = element_text(size=14),
legend.text = element_text(size=9),
panel.background = element_rect(fill = "grey90"))
print(p1)
# ------------ stacked bar chart --------------
## set the order according to totals
temp_df_2 <-
a_df %>%
group_by(the_fruits) %>%
summarise(the_count = n()) %>%
arrange(the_count)
the_order_2 <- temp_df_2$the_fruits
## plot a stacked bar chart
p2 <-
plotting_df %>%
ggplot(aes(x = the_fruits, y = Freq, group = the_origin, fill = the_origin)) +
# NB: use "y = Freq" instead of "weight = Freq"
geom_bar(stat = "identity", position = "stack", width = 0.75) +
coord_flip() +
scale_x_discrete(limits = the_order_2) +
# NB: use the above to plot the bars in order
labs(x = "", y = "Number of fruits in the 'basket'") +
scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
# NB: use the above to change the default order and color of legend
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text = element_text(size=12),
axis.title = element_text(size=14),
plot.title = element_text(size=14),
legend.text = element_text(size=9),
panel.background = element_rect(fill = "grey90"))
print(p2)
10.3 Creating back-to-back bar charts
Example 4
rm(list = ls())
# load packages
library(dplyr)
library(ggplot2)
# create a fake data set
## some preparation
set.seed(123)
ten_positive_rand_numbers <- abs(rnorm(10)) + 0.1
the_prob <- ten_positive_rand_numbers / sum(ten_positive_rand_numbers)
fk_data <- data.frame(job_type = sample(LETTERS[1:10], 1000,
replace = TRUE, prob = the_prob),
gender = sample(c("Male", "Female"), 1000,
replace = TRUE))
# prepare data for plotting
plotting_df <-
fk_data %>%
group_by(job_type, gender) %>%
summarise(Freq = n()) %>%
# a trick!
mutate(Freq = if_else(gender == "Male", -Freq, Freq))
## find the order
temp_df <-
plotting_df %>%
filter(gender == "Female") %>%
arrange(Freq)
the_order <- temp_df$job_type
# plot
p <-
plotting_df %>%
ggplot(aes(x = job_type, y = Freq, group = gender, fill = gender)) +
geom_bar(stat = "identity", width = 0.75) +
coord_flip() +
scale_x_discrete(limits = the_order) +
# another trick!
scale_y_continuous(breaks = seq(-150, 150, 50),
labels = abs(seq(-150, 150, 50))) +
labs(x = "Job type", y = "Count", title = "Back-to-back bar chart") +
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "grey90")) +
# reverse the order of items in legend
# guides(fill = guide_legend(reverse = TRUE)) +
# change the default colors of bars
scale_fill_manual(values=c("red", "blue"),
name="",
breaks=c("Male", "Female"),
labels=c("Male", "Female"))
print(p)
Remark: We can use
scale_x_discrete(limits = rev(the_order))
to replace
scale_x_discrete(limits = the_order)
and the resulted chart is also called pyramid chart. If you google “population pyramid” you can find more examples of pyramid charts.
10.4 Creating Pareto charts
A Pareto chart basically is a bar chart (with the bars ordered) plus a frequency polygon (i.e. a line chart). It is useful for revealing something like the 80-20 rule—e.g. 80% of the accidents are due to 20% of the possible reasons. See https://en.wikipedia.org/wiki/Pareto_chart for more details. The following example shows how to make a Pareto chart. Please pay attention to how the layers are built up.
Example 5
rm(list = ls())
# load packages
library(dplyr)
library(ggplot2)
# create a fake data set
reasons <- c("Reason A", "Reason B", "reason C",
"Reason D", "Reason E", "reason F")
set_prob <- c(0.1, 0.2, 0.6, 0.05, 0.02, 0.03)
fk_data <-
data.frame(accident_NO = 1:1000,
reason = sample(reasons, 1000, replace = TRUE, prob = set_prob))
# prepare the data for plotting
plotting_df <-
fk_data %>%
group_by(reason) %>%
summarise(freq = n()) %>%
arrange(desc(freq)) %>%
mutate(relative_freq = freq / sum(freq),
cumulative_freq = cumsum(relative_freq))
## get the order of bars
the_order <- plotting_df$reason
# plot
p <-
plotting_df %>%
ggplot(aes(x = reason, weight = relative_freq)) +
geom_bar(width = 0.5, fill = "blue") +
scale_x_discrete(limits = the_order) +
scale_y_continuous(label = scales::percent) +
geom_point(aes(x = reason, y = cumulative_freq)) +
geom_line(aes(x = reason, y = cumulative_freq, group = 1)) +
# NB: Must use "group = 1"
labs(x = "", y = "Relative frequency",
title = "A Pareto diagram for reasons of 1000 accidents") +
theme(plot.title = element_text(hjust = 0.5))
# NB: Use theme to center the title
print(p)
10.5 Creating lollipop charts
Notice that \[ \hbox{a lollipo} = \hbox{a segment} + \hbox{a point}, \] thus it is natural to use geom_segment() and geom_point() to create lollipop charts.
Example 6
rm(list = ls())
library(ggplot2)
library(dplyr)
# create a fake data set
set.seed(9072017)
rand_numbers <- abs(rnorm(26))
the_prob <- rand_numbers/sum(rand_numbers)
fk_data <- data.frame(x = sample(LETTERS, 10000, replace = TRUE,
prob = the_prob))
# prepare data for plotting
plotting_df <-
fk_data %>%
group_by(x) %>%
summarise(Freq = n()) %>%
mutate(proportion = Freq/sum(Freq)) %>%
arrange(proportion)
the_order <- plotting_df$x
# plotting
p <-
plotting_df %>%
ggplot(aes(x = x, y = proportion)) +
geom_segment(aes(x = x, xend = x, y = 0, yend = proportion)) +
# use the above to plot segments
geom_point() +
# use the above to plot points
scale_x_discrete(limits = the_order) +
scale_y_continuous(labels = scales::percent) +
labs(x = "Category", y = "Proportion",
title = "A lollipop chart") +
theme(plot.title = element_text(hjust = 0.5))
# use the above to center the title
print(p)
10.6 Creating treemaps
A treepmap can show three variables by using lables, sizes of rectangles and colors. Below is a treemap of the top 15 NZ’s most populous cities based on the 2016 data. The original data comes from: https://en.wikipedia.org/wiki/List_of_cities_in_New_Zealand
Example 7
rm(list = ls())
# load packages
library(treemap)
library(readr) # for read_csv
# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)
# make a tree map
treemap(dtf = NZ_cities,
index=c("City_name"),
vSize="Population",
vColor="Population_density",
palette="Spectral",
type="value",
border.col=c("grey70", "grey90"),
fontsize.title = 18,
algorithm="pivotSize",
title ="Treemap of the top 15 NZ's most populous cities",
title.legend="Population density (people/km^2)")
10.7 Creating scatter plots
A scatter plot is very useful for exploring the relationship between two continuous variables. With the following example, we show how to create a scatter plot. We want to emphasize the details, that is,
- label properly
- mark the outliers
- add in the regression line
- refit data and add in the new regression line
Example 8
rm(list = ls())
# load packages
library(readr) # for read_csv
library(ggplot2)
# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)
p1 <-
ggplot(NZ_cities, aes(x = Area_in_km2, y = Population)) +
geom_point() +
scale_y_continuous(labels = scales::comma) +
# NB: use the above to mark large numbers
labs(x = "Area (in km^2)",
title = paste0("Population vs. area for the \n",
"top 15 NZ's most populous cities")) +
# NB: use paste0 to break a long line to two lines
theme(plot.title = element_text(hjust = 0.5))
print(p1)
# Next, we want to label the points for
# Auckland, Wellington, Christchurch, and Hamilton with their names and red
# Also, add the regression line in
# load packages
library(dplyr)
library(ggrepel) # for geom_text_repel()
# add two new columns to NZ_cities
biggest_cities <- c("Auckland", "Wellington", "Christchurch","Hamilton")
NZ_cities_1 <-
NZ_cities %>%
mutate(the_label = if_else(City_name %in% biggest_cities, City_name, ""),
the_color = if_else(City_name %in% biggest_cities, "red", "black"))
p2 <-
p1 +
geom_text_repel(data = NZ_cities_1, aes(label = the_label)) +
geom_point(color = NZ_cities_1$the_color) +
# add in the regression line
geom_smooth(method = "lm", se = FALSE)
print(p2)
# Finally, refit the data after removing "Auckland", "Wellington", "Christchurch"
# add in the new regression line
NZ_cities_2 <-
NZ_cities %>%
filter(!(City_name %in% biggest_cities[1:3])) %>%
select(City_name, Population, Area_in_km2)
## find the regression equtions
line_1 <- lm(Population ~ Area_in_km2, NZ_cities)
line_2 <- lm(Population ~ Area_in_km2, NZ_cities_2)
line_1_eq <- paste0("Line 1: ", "Population = ",
round(line_1[[1]][1], 2), " + ",
round(line_1[[1]][2], 2), " * Area")
line_2_eq <- paste0("Line 2: ", "Population = ",
round(line_2[[1]][1], 2), " + ",
round(line_2[[1]][2], 2), " * Area")
p3 <-
p1 +
geom_text_repel(aes(label = NZ_cities_1$the_label)) +
geom_point(color = NZ_cities_1$the_color) +
# add in the regression line
geom_smooth(method = "lm", se = FALSE, color = "blue") +
geom_smooth(data = NZ_cities_2, method = "lm", se = FALSE, color = "purple") +
annotate(geom = "text", x = 400, y = 1400000,
label=line_1_eq, color="blue") +
annotate(geom = "text", x = 400, y = 1250000,
label=line_2_eq, color="purple")
print(p3)
10.8 Creating side-by-side box plots
Roughly speaking, a box plot shows the five-number summary—i.e. minimum, first quartile, second quartile, third quartile, and maximum—of data. Plotting several box plots together, we have the so-called side-by-side box plot, which is useful for comparison of data among groups.
In the following example, we will create a side-by-side box plot for random numbers drawn from the standard normal distribution, the t distribution with five degrees of freedom, the uniform distribution on \((-3, 3)\), and the double exponential distribution with the probability density \[ f(y)=\frac{1}{2}\lambda e ^{-\lambda |y|}\ \hbox{for}\ -\infty <y<+\infty, \] where \(\lambda=\sqrt{\frac{2}{\pi}}\).
For our purpose, we need this
Technical note: We can show that if \(X\sim \hbox{Exp}(\lambda)\), \(U\sim \hbox{Uniform}(0, 1)\), and \(X\) and \(U\) are independent, then \[ Y=\left\{ \begin{array}{rl} -X, & \hbox{if}\ U\le 0.5,\\ X, & \hbox{if}\ U > 0.5, \end{array} \right. \] has a double exponential distribution; that is, the probability density function of \(Y\) is \[ f(y) = \frac{\lambda}{2}e^{-\lambda |y|}\ \hbox{for}\ -\infty < y < +\infty. \]
Example 9
rm(list = ls())
# load packages
library(dplyr)
library(tidyr) # for gather()
library(ggplot2)
# create a fake data set
set.seed(1234567)
fk_data <-
data.frame(Normal = rnorm(1000),
t_df_5 = rt(1000, df = 5),
Unif = runif(1000, -3, 3),
Exp = rexp(1000, rate = sqrt(2/pi)),
Unif_temp = runif(1000, 0, 1)) %>%
mutate(the_indi = if_else(Unif_temp <= 0.5, -1, 1)) %>%
mutate(Double_exp = Exp * the_indi) %>%
select(-Exp, -Unif_temp, -the_indi)
# prepare data for plotting
plotting_df <-
fk_data %>%
gather(key = distribution, value = rand_number, Normal:Double_exp)
# plot
p <-
plotting_df %>%
ggplot(aes(x = distribution, y = rand_number, group = distribution)) +
geom_boxplot() +
coord_flip() +
scale_x_discrete(breaks = c("Double_exp", "Normal", "t_df_5", "Unif"),
labels = c("Double Exponential",
"Standard Normal",
"t with df=5",
"Uniform on (-3, 3)")) +
# NB: use the above to change x-axis tick marks
labs(x = "Distribution", y = "", title = "Side-by-side box plot") +
theme(plot.title = element_text(hjust = 0.5))
print(p)
10.9 Creating grid plots
Grid plots allow us to show several (e.g. four) variables in one plot, and certainly they are useful. The key here is to use facet_grid().
Example 10
rm(list = ls())
# load packages
library(dplyr)
library(tidyr)
library(ggplot2)
# create a fake data set
## a helper function
set.seed(21072017)
create_year_data <- function(year = 2015, n = 20)
{temp_df <- data.frame(year = rep(year, n),
gender = sample(c("male", "female"), n, replace = TRUE),
stats_grade = rnorm(n, mean = 55, sd = 10),
math_grade = rnorm(n, mean = 60, sd = 10))
return(temp_df)
}
data_2016 <- create_year_data(year = 2016, n = 20)
data_2015 <- create_year_data(year = 2015, n = 20)
data_2014 <- create_year_data(year = 2014, n = 20)
fk_data <- bind_rows(data_2016, data_2015, data_2014)
# prepare data for plotting
plotting_df <-
fk_data %>%
group_by(year, gender) %>%
summarise(Stats = mean(stats_grade),
Maths = mean(math_grade)) %>%
ungroup() %>%
# make a long table
gather(key = subject, value = grade, -year, -gender) %>%
arrange(year)
# plot
the_title <- paste0("Averge maths and stats grades for\n",
"female and male students in 2014-2016")
p <-
plotting_df %>%
ggplot(aes(y = grade, color = gender)) +
geom_segment(aes(x = gender, xend = gender, y = 0, yend = grade)) +
geom_point(aes(x = gender, y = grade)) +
coord_flip() +
scale_x_discrete(limits = c("male", "female")) +
facet_grid(year ~ subject) +
labs(x = "", y = "Average Grade",
title = the_title) +
theme(plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
panel.background = element_rect(fill = "grey90"))
print(p)
10.10 Creating a simple PCA plot
When we have an \(n\)-variate (\(n\ge 3\)) data set, where each column contains continuous type data, we often want to look at the cluster relationship among the \(m\) observations (or rows). For this purpose. we can make a PCA (Principal Component Analysis) plot. The fundamental idea here is that we map the \(n\)-dimension data to 2-dimension (PC1 and PC2) data and then make a scatter plot of the 2-dimension data.
Example 11
rm(list=ls())
# load packages
library(dplyr, quietly = TRUE)
library(ggplot2, quietly = TRUE)
# NB: We will use iris, which is a data set from R
(head(iris))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# do the mapping
## step 1: find the covariance
the_cov <- cov(iris[, 1:4])
## step 2: find the eigen values and vectors
the_eigen <- eigen(the_cov)
## Remark: steps 1 and 2 together is equivalent to
## PC <- prcomp(iris[, 1:4])
## step 3: mapping
map_2_PC1_PC2 <-
as.matrix(iris[,1:4]) %*% the_eigen[[2]][, 1:2] %>%
as.data.frame()
# check how much variance are expressed by PC1 and PC2
(the_proportions = the_eigen[[1]][1:2]/sum(the_eigen[[1]]))
## [1] 0.92461872 0.05306648
# prepare dataframe for plotting
temp_df <- data.frame(Species = iris[, 5])
plotting_df <-
bind_cols(map_2_PC1_PC2, temp_df) %>%
rename(PC1 = V1, PC2 = V2)
# plotting
ggplot(plotting_df, aes(x = PC1, y = PC2, color = Species)) +
geom_point()
10.11 Creating time series plots
It is easy to create a time series plot. Here we pay attention to some “small” things.
- Make sure the time is labelled correctly on x-axis.
- If the values are for money and large, then we should show the $ sign and use “,” in the numbers for labeling y-axis.
- We often want to get the points connected to show that the points are related.
Example 12
rm(list = ls())
# load packages
library(ggplot2)
library(lubridate) # for dealing with data related to time
# create a fake data set
for_year <- 1997:2016
for_month <- rep(12, 20)
for_day <- rep(31, 20)
col_1 <- paste0(for_year, "-", for_month, "-", for_day)
col_2 <- rep(0, 20)
## simulated data from a random walk
set.seed(20170805)
epsilon <- rnorm(20, mean = 0, sd = 10000)
x0 <- 5*1e5
for(i in 1:20)
{col_2[i] <- x0 + epsilon[i]
x0 <- col_2[i]
}
fk_data <-
data.frame(EndOfYear = ymd(col_1),
Value_in_dollar = round(col_2, 0))
# plot data
p <-
ggplot(fk_data, aes(x = year(EndOfYear), y = Value_in_dollar)) +
# Note the use of function year()
geom_point() +
geom_line() +
scale_y_continuous(labels = scales::dollar) +
labs(x = "Year", y = "Market value", title = "Plot of a time series") +
theme(plot.title = element_text(hjust = 0.5))
print(p)
10.12 Showing pop-up’s
For exploratory data analysis, we may want our plot to have such a feature, which is when we hover the mouse on the plot some information will pop up. In the following example, I will show how to do it with plotly::ggplotly() (thanks to Chris Hansen for pointing this function to me.) Of course there are other useful R packages available for showing pop-up’s, such as googleVis and highcharter, if having an interest the readers can explore them.
Example 13
rm(list = ls())
# load packages
library(readr) # for read_csv
library(ggplot2)
library(plotly)
# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)
p1 <-
ggplot(NZ_cities, aes(x = Area_in_km2, y = Population, fill = City_name)) +
geom_point() +
scale_y_continuous(labels = scales::comma) +
# NB: use the above to mark large numbers
labs(x = "Area (in km^2)", y = "PopSize",
title = paste0("Population vs. area for the ",
"top 15 NZ's most populous cities")) +
# NB: use paste0 to break a long line to two lines
theme(plot.title = element_text(hjust = 0.5),
legend.position="none")
ggplotly(p1) %>% config(displayModeBar = FALSE)
10.13 Putting plots in one panel
We create a few plots and want to put them together. It is handy to do so with gridExtra::grid.arrange(). (I thank Peter Ellis for pointing me to this function.)
Example 14
rm(list = ls())
# load packages
library(gridExtra)
library(ggplot2)
# a function for plotting probability density functions
plot_density <- function(func_name = dnorm, para = list(mean=0, sd=1),
domain = data.frame(x = c(-3, 3)),
title_lable = "PDF of N(0, 1)")
{p <- ggplot(domain, aes(x)) +
stat_function(fun = func_name, args = para, color = "red") +
labs(x = "x", y = "f(x)", title = title_lable) +
theme(plot.title = element_text(hjust = 0.5)) # make the title in center
return(p)
}
# plot four probability density functions
p1 <- plot_density()
p2 <- plot_density(func_name = dt, para = list(df=30),
title_lable = "PDF of t distribution with df=30")
p3 <- plot_density(func_name = dexp, para = list(rate = 1),
domain = data.frame(x = c(0, 10)),
title_lable = "PDF of Exp(1) distribution")
p4<- plot_density(func_name = dchisq, para = list(df=5),
domain = data.frame(x = c(0, 10)),
title_lable = "PDF of Chisq distribution with df=5")
# put the four plots together
grid.arrange(p1, p2, p3, p4, newpage = TRUE,
layout_matrix = matrix(1:4, byrow = TRUE, 2, 2))