10 How to plot data

How to plot data? This is a big question, and here I can give a quick/brief answer, which is this two-step procedure. Step 1: Get the data ready. Step 2: Use ggplot2 package (or another package, e.g. treemap package, for some a specific plot). In the following 13 sections, I will use examples to illustrate the two-step procedure.

10.1 Creating basic bar charts

Essentially, a basic bar chart is a plot of a categorical variable on x-axis and a numerical variable on y-axis.

Example 1: a basic bar chart.

rm(list = ls())

# load packages
library(ggplot2)

# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana")
the_fruits <- sample(fruits, 100, replace = TRUE)

plotting_df <- as.data.frame.table(table(the_fruits))

# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = Freq)) +
  # NB: use "weight = Freq" instead of "y = Freq"
  geom_bar(width = 0.5, fill = "blue") +
  # NB: use "width" and "fill" to change the default bar width and color
  labs(x = "", y = "Frequency", 
       title = "A basic bar chart for a basket of fruits") +
  theme(plot.title = element_text(hjust = 0.5)) +
  # NB: use theme to center the title
  geom_text(aes(x = the_fruits, y = Freq + 1, label = Freq)) 
  # NB: use "geom_text" to put the the numbers to indicate heights of bars

print(p)

Example 2: still a basic bar chart but making the bars horizontal and based on percentage

rm(list = ls())

# load packages
library(ggplot2)
library(dplyr)

# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum", 
            "kiwi fruit", "peach", "mango", "lemon")
the_fruits <- sample(fruits, 1000, replace = TRUE)

a_table <- table(the_fruits)
plotting_df <- 
  as.data.frame.table(a_table) %>% 
  mutate(proportion = Freq / sum(Freq))

## Create a vector to order the fruits in terms of proportion 
for_sorting <- plotting_df %>%
  arrange(proportion)
fruits_order <- for_sorting$the_fruits

# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = proportion)) +
  # NB: use "weight = proportion" instead of "y = proportion"
  geom_bar(width = 0.5, fill = "blue") +
  # NB: use "width" and "fill" to change the default bar width and color
  labs(x = "", y = "", 
       title = "A 'horizontal' bar chart for a basket of fruits") +
  coord_flip() +
  # NB: use "coord_flip" to flip coordinates
  scale_x_discrete(limits = fruits_order) +
  # NB: use the above to set the order of bars
  scale_y_continuous(limits = c(0, max(plotting_df$proportion)+0.015)) +
  # NB: use the above to make the plot slightly bigger than the default one
  geom_text(aes(x = the_fruits, y = proportion + 0.006, 
                label = scales::percent(proportion))) +
  # NB: use the above to put the the pentage numbers to indicate lengths of bars
  theme(plot.title = element_text(hjust = 0.5), 
        axis.text.x = element_blank(),
        axis.ticks = element_blank()) 
  # NB: use theme to center the title, to remove axis text and ticks
  

print(p)

10.2 Creating side-by-side and stacked bar charts

Example 3

rm(list = ls())

# load packages
library(ggplot2)
library(dplyr)

#-------------------
# Aim: To plot numbers of all kinds of fruits in "local" and "imported" groups
#-------------------


# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum", 
            "kiwi fruit", "peach", "mango", "lemon")
origin <- c("local", "imported")


a_df <- data.frame(the_fruits = sample(fruits, 1000, replace = TRUE),
                   the_origin = sample(origin, 1000, replace = TRUE)) 

plotting_df <-
  a_df %>%  
  group_by(the_origin, the_fruits) %>% 
  summarise(Freq = n())
 
# ------------ side-by-side bar chart --------------
## set the order of bars according to local fruits   
temp_df_1 <-
  plotting_df %>% 
  filter(the_origin == "local") %>% 
  arrange(Freq)
the_order <- temp_df_1$the_fruits

## plot a side-by-side bar chart
p1 <- 
  plotting_df %>% 
  ggplot(aes(x = the_fruits, weight = Freq, fill = the_origin)) +
  geom_bar(position = "dodge", width = 0.75) +
  # NB: use the above to plot bars in the certain order
  coord_flip() +
  scale_x_discrete(limits = the_order) +
  labs(x = "", y = "Number of fruits in the 'basket'") +
  scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
  # NB: use the above to change the default order and color of legend
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size=12),
        axis.title = element_text(size=14), 
        plot.title = element_text(size=14),
        legend.text = element_text(size=9),
        panel.background = element_rect(fill =  "grey90"))

print(p1)

# ------------ stacked bar chart --------------
## set the order according to totals
temp_df_2 <-
  a_df %>% 
  group_by(the_fruits) %>% 
  summarise(the_count = n()) %>% 
  arrange(the_count)
the_order_2 <- temp_df_2$the_fruits

## plot a stacked bar chart
p2 <- 
  plotting_df %>% 
  ggplot(aes(x = the_fruits, y = Freq, group = the_origin, fill = the_origin)) +
  # NB: use "y = Freq" instead of "weight = Freq"
  geom_bar(stat = "identity", position = "stack", width = 0.75) +
  coord_flip() +
  scale_x_discrete(limits = the_order_2) +
  # NB: use the above to plot the bars in order
  labs(x = "", y = "Number of fruits in the 'basket'") +
  scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
  # NB: use the above to change the default order and color of legend
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size=12),
        axis.title = element_text(size=14), 
        plot.title = element_text(size=14),
        legend.text = element_text(size=9),
        panel.background = element_rect(fill =  "grey90"))

print(p2)

10.3 Creating back-to-back bar charts

Example 4

rm(list = ls())

# load packages
library(dplyr)
library(ggplot2)

# create a fake data set
## some preparation
set.seed(123)
ten_positive_rand_numbers <- abs(rnorm(10)) + 0.1
the_prob <- ten_positive_rand_numbers / sum(ten_positive_rand_numbers)

fk_data <- data.frame(job_type = sample(LETTERS[1:10], 1000, 
                                        replace = TRUE, prob = the_prob),
                      gender = sample(c("Male", "Female"), 1000, 
                                      replace = TRUE))

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(job_type, gender) %>% 
  summarise(Freq = n()) %>% 
  # a trick!
  mutate(Freq = if_else(gender == "Male", -Freq, Freq))
## find the order
temp_df <-
  plotting_df %>% 
  filter(gender == "Female") %>% 
  arrange(Freq)
the_order <- temp_df$job_type


# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = job_type, y = Freq, group = gender, fill = gender)) +
  geom_bar(stat = "identity", width = 0.75) +
  coord_flip() +
  scale_x_discrete(limits = the_order) +
  # another trick!
  scale_y_continuous(breaks = seq(-150, 150, 50), 
                     labels = abs(seq(-150, 150, 50))) +
  labs(x = "Job type", y = "Count", title = "Back-to-back bar chart") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(fill =  "grey90")) +
  # reverse the order of items in legend
  # guides(fill = guide_legend(reverse = TRUE)) +
  # change the default colors of bars
  scale_fill_manual(values=c("red", "blue"),
                    name="",
                    breaks=c("Male", "Female"),
                    labels=c("Male", "Female")) 

print(p)

Remark: We can use

scale_x_discrete(limits = rev(the_order))

to replace

scale_x_discrete(limits = the_order)

and the resulted chart is also called pyramid chart. If you google “population pyramid” you can find more examples of pyramid charts.

10.4 Creating Pareto charts

A Pareto chart basically is a bar chart (with the bars ordered) plus a frequency polygon (i.e. a line chart). It is useful for revealing something like the 80-20 rule—e.g. 80% of the accidents are due to 20% of the possible reasons. See https://en.wikipedia.org/wiki/Pareto_chart for more details. The following example shows how to make a Pareto chart. Please pay attention to how the layers are built up.

Example 5

rm(list = ls())

# load packages
library(dplyr)
library(ggplot2)

# create a fake data set
reasons <- c("Reason A", "Reason B", "reason C",
             "Reason D", "Reason E", "reason F")
set_prob <- c(0.1, 0.2, 0.6, 0.05, 0.02, 0.03)

fk_data <-
  data.frame(accident_NO = 1:1000,
             reason = sample(reasons, 1000, replace = TRUE, prob = set_prob))

# prepare the data for plotting
plotting_df <-
  fk_data %>% 
  group_by(reason) %>% 
  summarise(freq = n()) %>% 
  arrange(desc(freq)) %>%
  mutate(relative_freq = freq / sum(freq),
         cumulative_freq = cumsum(relative_freq)) 
## get the order of bars
the_order <- plotting_df$reason

# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = reason, weight = relative_freq)) +
  geom_bar(width = 0.5, fill = "blue") +
  scale_x_discrete(limits = the_order) +
  scale_y_continuous(label = scales::percent) +
  geom_point(aes(x = reason, y = cumulative_freq)) +
  geom_line(aes(x = reason, y = cumulative_freq, group = 1)) +
  # NB: Must use "group = 1"
  labs(x = "", y = "Relative frequency", 
       title = "A Pareto diagram for reasons of 1000 accidents") +
  theme(plot.title = element_text(hjust = 0.5))
  # NB: Use theme to center the title

print(p)

10.5 Creating lollipop charts

Notice that \[ \hbox{a lollipo} = \hbox{a segment} + \hbox{a point}, \] thus it is natural to use geom_segment() and geom_point() to create lollipop charts.

Example 6

rm(list = ls())

library(ggplot2)
library(dplyr)

# create a fake data set
set.seed(9072017)
rand_numbers <- abs(rnorm(26))
the_prob <- rand_numbers/sum(rand_numbers)
fk_data <- data.frame(x = sample(LETTERS, 10000, replace = TRUE,
                                 prob = the_prob))

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(x) %>% 
  summarise(Freq = n()) %>% 
  mutate(proportion = Freq/sum(Freq)) %>% 
  arrange(proportion)

the_order <- plotting_df$x

# plotting

p <- 
  plotting_df %>% 
  ggplot(aes(x = x, y = proportion)) +
  geom_segment(aes(x = x, xend = x, y = 0, yend = proportion)) +
  # use the above to plot segments
  geom_point() +
  # use the above to plot points
  scale_x_discrete(limits = the_order) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Category", y = "Proportion", 
       title = "A lollipop chart") +
  theme(plot.title = element_text(hjust = 0.5))
  # use the above to center the title

print(p)

10.6 Creating treemaps

A treepmap can show three variables by using lables, sizes of rectangles and colors. Below is a treemap of the top 15 NZ’s most populous cities based on the 2016 data. The original data comes from: https://en.wikipedia.org/wiki/List_of_cities_in_New_Zealand

Example 7

rm(list = ls())

# load packages
library(treemap)
library(readr) # for read_csv

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

# make a tree map
treemap(dtf = NZ_cities,
        index=c("City_name"),
        vSize="Population",
        vColor="Population_density",
        palette="Spectral",
        type="value",
        border.col=c("grey70", "grey90"),
        fontsize.title = 18,
        algorithm="pivotSize",
        title ="Treemap of the top 15 NZ's most populous cities",
        title.legend="Population density (people/km^2)")

10.7 Creating scatter plots

A scatter plot is very useful for exploring the relationship between two continuous variables. With the following example, we show how to create a scatter plot. We want to emphasize the details, that is,

  • label properly
  • mark the outliers
  • add in the regression line
  • refit data and add in the new regression line

Example 8

rm(list = ls())

# load packages
library(readr) # for read_csv
library(ggplot2)

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

p1 <-
  ggplot(NZ_cities, aes(x = Area_in_km2, y = Population)) +
  geom_point() +
  scale_y_continuous(labels = scales::comma) +
  # NB: use the above to mark large numbers 
  labs(x = "Area (in km^2)", 
       title = paste0("Population vs. area for the \n",
                      "top 15 NZ's most populous cities")) +
  # NB: use paste0 to break a long line to two lines
  theme(plot.title = element_text(hjust = 0.5))

print(p1)

# Next, we want to label the points for 
# Auckland, Wellington, Christchurch, and Hamilton with their names and red
# Also, add the regression line in

# load packages
library(dplyr)
library(ggrepel) # for geom_text_repel()

# add two new columns to NZ_cities
biggest_cities <- c("Auckland", "Wellington", "Christchurch","Hamilton")
NZ_cities_1 <- 
  NZ_cities %>% 
  mutate(the_label = if_else(City_name %in% biggest_cities, City_name, ""),
         the_color = if_else(City_name %in% biggest_cities, "red", "black"))

p2 <-
  p1 +
  geom_text_repel(data = NZ_cities_1, aes(label = the_label)) +
  geom_point(color = NZ_cities_1$the_color) +
  # add in the regression line
  geom_smooth(method = "lm", se = FALSE) 

print(p2)

# Finally, refit the data after removing "Auckland", "Wellington", "Christchurch"
# add in the new regression line
NZ_cities_2 <- 
  NZ_cities %>%
  filter(!(City_name %in% biggest_cities[1:3])) %>% 
  select(City_name, Population, Area_in_km2)

## find the regression equtions
line_1 <- lm(Population ~ Area_in_km2, NZ_cities)
line_2 <- lm(Population ~ Area_in_km2, NZ_cities_2)
line_1_eq <- paste0("Line 1: ", "Population = ", 
                    round(line_1[[1]][1], 2), " + ",
                    round(line_1[[1]][2], 2), " * Area")
line_2_eq <- paste0("Line 2: ", "Population = ", 
                    round(line_2[[1]][1], 2), " + ",
                    round(line_2[[1]][2], 2), " * Area")

p3 <- 
  p1 +
  geom_text_repel(aes(label = NZ_cities_1$the_label)) +
  geom_point(color = NZ_cities_1$the_color) +
  # add in the regression line
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  geom_smooth(data = NZ_cities_2, method = "lm", se = FALSE, color = "purple") +
  annotate(geom = "text", x = 400, y = 1400000, 
           label=line_1_eq, color="blue") +
  annotate(geom = "text", x = 400, y = 1250000, 
           label=line_2_eq, color="purple")

print(p3)

10.8 Creating side-by-side box plots

Roughly speaking, a box plot shows the five-number summary—i.e. minimum, first quartile, second quartile, third quartile, and maximum—of data. Plotting several box plots together, we have the so-called side-by-side box plot, which is useful for comparison of data among groups.

In the following example, we will create a side-by-side box plot for random numbers drawn from the standard normal distribution, the t distribution with five degrees of freedom, the uniform distribution on \((-3, 3)\), and the double exponential distribution with the probability density \[ f(y)=\frac{1}{2}\lambda e ^{-\lambda |y|}\ \hbox{for}\ -\infty <y<+\infty, \] where \(\lambda=\sqrt{\frac{2}{\pi}}\).

For our purpose, we need this

Technical note: We can show that if \(X\sim \hbox{Exp}(\lambda)\), \(U\sim \hbox{Uniform}(0, 1)\), and \(X\) and \(U\) are independent, then \[ Y=\left\{ \begin{array}{rl} -X, & \hbox{if}\ U\le 0.5,\\ X, & \hbox{if}\ U > 0.5, \end{array} \right. \] has a double exponential distribution; that is, the probability density function of \(Y\) is \[ f(y) = \frac{\lambda}{2}e^{-\lambda |y|}\ \hbox{for}\ -\infty < y < +\infty. \]

Example 9

rm(list = ls())

# load packages
library(dplyr)
library(tidyr) # for gather()
library(ggplot2)

# create a fake data set
set.seed(1234567)
fk_data <-
  data.frame(Normal = rnorm(1000),
             t_df_5 = rt(1000, df = 5),
             Unif = runif(1000, -3, 3),
             Exp = rexp(1000, rate = sqrt(2/pi)),
             Unif_temp = runif(1000, 0, 1)) %>% 
  mutate(the_indi = if_else(Unif_temp <= 0.5, -1, 1)) %>% 
  mutate(Double_exp = Exp * the_indi) %>% 
  select(-Exp, -Unif_temp, -the_indi)  

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  gather(key = distribution, value = rand_number, Normal:Double_exp)

# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = distribution, y = rand_number, group = distribution)) +
  geom_boxplot() +
  coord_flip() +
  scale_x_discrete(breaks = c("Double_exp", "Normal", "t_df_5", "Unif"), 
                   labels = c("Double Exponential", 
                              "Standard Normal", 
                              "t with df=5", 
                              "Uniform on (-3, 3)")) +
  # NB: use the above to change x-axis tick marks
  labs(x = "Distribution", y = "", title = "Side-by-side box plot") +
  theme(plot.title = element_text(hjust = 0.5))
  
print(p)

10.9 Creating grid plots

Grid plots allow us to show several (e.g. four) variables in one plot, and certainly they are useful. The key here is to use facet_grid().

Example 10

rm(list = ls())

# load packages
library(dplyr)
library(tidyr)
library(ggplot2)

# create a fake data set
## a helper function
set.seed(21072017)
create_year_data <- function(year = 2015, n = 20)
{temp_df <- data.frame(year = rep(year, n),
                       gender = sample(c("male", "female"), n, replace = TRUE),
                       stats_grade = rnorm(n, mean = 55, sd = 10),
                       math_grade = rnorm(n, mean = 60, sd = 10))
 return(temp_df)
}

data_2016 <- create_year_data(year = 2016, n = 20)
data_2015 <- create_year_data(year = 2015, n = 20)
data_2014 <- create_year_data(year = 2014, n = 20)

fk_data <- bind_rows(data_2016, data_2015, data_2014)

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(year, gender) %>% 
  summarise(Stats = mean(stats_grade),
            Maths = mean(math_grade)) %>% 
  ungroup() %>% 
  # make a long table
  gather(key = subject, value = grade, -year, -gender) %>% 
  arrange(year)

# plot
the_title <- paste0("Averge maths and stats grades for\n",
                    "female and male students in 2014-2016")
p <- 
  plotting_df %>% 
  ggplot(aes(y = grade, color = gender)) +
  geom_segment(aes(x = gender, xend = gender, y = 0, yend = grade)) +
  geom_point(aes(x = gender, y = grade)) +
  coord_flip() +
  scale_x_discrete(limits = c("male", "female")) +
  facet_grid(year ~ subject) +
  labs(x = "", y = "Average Grade", 
       title = the_title) +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title = element_blank(),
        panel.background = element_rect(fill =  "grey90")) 

print(p)

10.10 Creating a simple PCA plot

When we have an \(n\)-variate (\(n\ge 3\)) data set, where each column contains continuous type data, we often want to look at the cluster relationship among the \(m\) observations (or rows). For this purpose. we can make a PCA (Principal Component Analysis) plot. The fundamental idea here is that we map the \(n\)-dimension data to 2-dimension (PC1 and PC2) data and then make a scatter plot of the 2-dimension data.

Example 11

rm(list=ls())

# load packages
library(dplyr, quietly = TRUE)
library(ggplot2, quietly = TRUE)

# NB: We will use iris, which is a data set from R 
(head(iris))
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# do the mapping
## step 1: find the covariance
the_cov <-  cov(iris[, 1:4]) 

## step 2: find the eigen values and vectors
the_eigen <- eigen(the_cov)

## Remark: steps 1 and 2 together is equivalent to 
## PC <- prcomp(iris[, 1:4])

## step 3: mapping
map_2_PC1_PC2 <- 
  as.matrix(iris[,1:4]) %*% the_eigen[[2]][, 1:2]  %>% 
  as.data.frame()

# check how much variance are expressed by PC1 and PC2
(the_proportions = the_eigen[[1]][1:2]/sum(the_eigen[[1]]))
## [1] 0.92461872 0.05306648
# prepare dataframe for plotting
temp_df <- data.frame(Species = iris[, 5])
plotting_df <- 
  bind_cols(map_2_PC1_PC2, temp_df) %>%
  rename(PC1 = V1, PC2 = V2)

# plotting
ggplot(plotting_df, aes(x = PC1, y = PC2, color = Species)) +
  geom_point()

10.11 Creating time series plots

It is easy to create a time series plot. Here we pay attention to some “small” things.

  1. Make sure the time is labelled correctly on x-axis.
  2. If the values are for money and large, then we should show the $ sign and use “,” in the numbers for labeling y-axis.
  3. We often want to get the points connected to show that the points are related.

Example 12

rm(list = ls())

# load packages
library(ggplot2)
library(lubridate) # for dealing with data related to time

# create a fake data set
for_year <- 1997:2016
for_month <- rep(12, 20)
for_day <- rep(31, 20)
col_1 <- paste0(for_year, "-", for_month, "-", for_day)
col_2 <- rep(0, 20)
## simulated data from a random walk
set.seed(20170805)
epsilon <- rnorm(20, mean = 0, sd = 10000)
x0 <- 5*1e5
for(i in 1:20)
{col_2[i] <- x0 + epsilon[i]
 x0 <- col_2[i]
}
fk_data <-
  data.frame(EndOfYear = ymd(col_1),
             Value_in_dollar = round(col_2, 0))

# plot data
p <- 
  ggplot(fk_data, aes(x = year(EndOfYear), y = Value_in_dollar)) +
  # Note the use of function year()
  geom_point() +
  geom_line() +
  scale_y_continuous(labels = scales::dollar) +
  labs(x = "Year", y = "Market value", title = "Plot of a time series") +
  theme(plot.title = element_text(hjust = 0.5))

print(p)

10.12 Showing pop-up’s

For exploratory data analysis, we may want our plot to have such a feature, which is when we hover the mouse on the plot some information will pop up. In the following example, I will show how to do it with plotly::ggplotly() (thanks to Chris Hansen for pointing this function to me.) Of course there are other useful R packages available for showing pop-up’s, such as googleVis and highcharter, if having an interest the readers can explore them.

Example 13

rm(list = ls())

# load packages
library(readr) # for read_csv
library(ggplot2)
library(plotly)

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

p1 <-
  ggplot(NZ_cities, aes(x = Area_in_km2, y = Population, fill = City_name)) +
  geom_point() +
  scale_y_continuous(labels = scales::comma) +
  # NB: use the above to mark large numbers 
  labs(x = "Area (in km^2)", y = "PopSize",
       title = paste0("Population vs. area for the ",
                      "top 15 NZ's most populous cities")) +
  # NB: use paste0 to break a long line to two lines
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position="none")

ggplotly(p1) %>% config(displayModeBar = FALSE)

10.13 Putting plots in one panel

We create a few plots and want to put them together. It is handy to do so with gridExtra::grid.arrange(). (I thank Peter Ellis for pointing me to this function.)

Example 14

rm(list = ls())

# load packages
library(gridExtra)
library(ggplot2)

# a function for plotting probability density functions
plot_density <- function(func_name = dnorm, para = list(mean=0, sd=1),
                         domain = data.frame(x = c(-3, 3)),
                         title_lable = "PDF of N(0, 1)")
{p <- ggplot(domain, aes(x)) + 
  stat_function(fun = func_name, args = para, color = "red") +
  labs(x = "x", y = "f(x)", title = title_lable) +
  theme(plot.title = element_text(hjust = 0.5)) # make the title in center
 return(p)
}

# plot four probability density functions 
p1 <- plot_density()
p2 <- plot_density(func_name = dt, para = list(df=30),
                   title_lable = "PDF of t distribution with df=30")
p3 <- plot_density(func_name = dexp, para = list(rate = 1),
                   domain = data.frame(x = c(0, 10)),
                   title_lable = "PDF of Exp(1) distribution")
p4<- plot_density(func_name = dchisq, para = list(df=5),
                  domain = data.frame(x = c(0, 10)),
                  title_lable = "PDF of Chisq distribution with df=5")

# put the four plots together 
grid.arrange(p1, p2, p3, p4, newpage = TRUE, 
             layout_matrix = matrix(1:4, byrow = TRUE, 2, 2))