10 How to plot data

How to plot data? This is a big question, and here I can give a quick/brief answer, which is this two-step procedure. Step 1: Get the data ready. Step 2: Use ggplot2 package (or another package, e.g. treemap package, for some a specific plot). In the following 13 sections, I will use examples to illustrate the two-step procedure.

10.1 Creating basic bar charts

Essentially, a basic bar chart is a plot of a categorical variable on x-axis and a numerical variable on y-axis.

Example 1: a basic bar chart.

rm(list = ls())

# load packages
library(ggplot2)

# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana")
the_fruits <- sample(fruits, 100, replace = TRUE)

plotting_df <- as.data.frame.table(table(the_fruits))

# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = Freq)) +
  # NB: use "weight = Freq" instead of "y = Freq"
  geom_bar(width = 0.5, fill = "blue") +
  # NB: use "width" and "fill" to change the default bar width and color
  labs(x = "", y = "Frequency", 
       title = "A basic bar chart for a basket of fruits") +
  theme(plot.title = element_text(hjust = 0.5)) +
  # NB: use theme to center the title
  geom_text(aes(x = the_fruits, y = Freq + 1, label = Freq)) 
  # NB: use "geom_text" to put the the numbers to indicate heights of bars

print(p)

Example 2: still a basic bar chart but making the bars horizontal and based on percentage

rm(list = ls())

# load packages
library(ggplot2)
library(dplyr)

# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum", 
            "kiwi fruit", "peach", "mango", "lemon")
the_fruits <- sample(fruits, 1000, replace = TRUE)

a_table <- table(the_fruits)
plotting_df <- 
  as.data.frame.table(a_table) %>% 
  mutate(proportion = Freq / sum(Freq))

## Create a vector to order the fruits in terms of proportion 
for_sorting <- plotting_df %>%
  arrange(proportion)
fruits_order <- for_sorting$the_fruits

# plotting
p <- ggplot(plotting_df, aes(x = the_fruits, weight = proportion)) +
  # NB: use "weight = proportion" instead of "y = proportion"
  geom_bar(width = 0.5, fill = "blue") +
  # NB: use "width" and "fill" to change the default bar width and color
  labs(x = "", y = "", 
       title = "A 'horizontal' bar chart for a basket of fruits") +
  coord_flip() +
  # NB: use "coord_flip" to flip coordinates
  scale_x_discrete(limits = fruits_order) +
  # NB: use the above to set the order of bars
  scale_y_continuous(limits = c(0, max(plotting_df$proportion)+0.015)) +
  # NB: use the above to make the plot slightly bigger than the default one
  geom_text(aes(x = the_fruits, y = proportion + 0.006, 
                label = scales::percent(proportion))) +
  # NB: use the above to put the the pentage numbers to indicate lengths of bars
  theme(plot.title = element_text(hjust = 0.5), 
        axis.text.x = element_blank(),
        axis.ticks = element_blank()) 
  # NB: use theme to center the title, to remove axis text and ticks
  

print(p)

10.2 Creating side-by-side and stacked bar charts

Example 3

rm(list = ls())

# load packages
library(ggplot2)
library(dplyr)

#-------------------
# Aim: To plot numbers of all kinds of fruits in "local" and "imported" groups
#-------------------


# prepare a dataframe for plotting
fruits <- c("apple", "orange", "banana", "pear", "plum", 
            "kiwi fruit", "peach", "mango", "lemon")
origin <- c("local", "imported")


a_df <- data.frame(the_fruits = sample(fruits, 1000, replace = TRUE),
                   the_origin = sample(origin, 1000, replace = TRUE)) 

plotting_df <-
  a_df %>%  
  group_by(the_origin, the_fruits) %>% 
  summarise(Freq = n())
 
# ------------ side-by-side bar chart --------------
## set the order of bars according to local fruits   
temp_df_1 <-
  plotting_df %>% 
  filter(the_origin == "local") %>% 
  arrange(Freq)
the_order <- temp_df_1$the_fruits

## plot a side-by-side bar chart
p1 <- 
  plotting_df %>% 
  ggplot(aes(x = the_fruits, weight = Freq, fill = the_origin)) +
  geom_bar(position = "dodge", width = 0.75) +
  # NB: use the above to plot bars in the certain order
  coord_flip() +
  scale_x_discrete(limits = the_order) +
  labs(x = "", y = "Number of fruits in the 'basket'") +
  scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
  # NB: use the above to change the default order and color of legend
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size=12),
        axis.title = element_text(size=14), 
        plot.title = element_text(size=14),
        legend.text = element_text(size=9),
        panel.background = element_rect(fill =  "grey90"))

print(p1)

# ------------ stacked bar chart --------------
## set the order according to totals
temp_df_2 <-
  a_df %>% 
  group_by(the_fruits) %>% 
  summarise(the_count = n()) %>% 
  arrange(the_count)
the_order_2 <- temp_df_2$the_fruits

## plot a stacked bar chart
p2 <- 
  plotting_df %>% 
  ggplot(aes(x = the_fruits, y = Freq, group = the_origin, fill = the_origin)) +
  # NB: use "y = Freq" instead of "weight = Freq"
  geom_bar(stat = "identity", position = "stack", width = 0.75) +
  coord_flip() +
  scale_x_discrete(limits = the_order_2) +
  # NB: use the above to plot the bars in order
  labs(x = "", y = "Number of fruits in the 'basket'") +
  scale_fill_brewer(breaks=c("local", "imported"), palette = "Set1") +
  # NB: use the above to change the default order and color of legend
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text = element_text(size=12),
        axis.title = element_text(size=14), 
        plot.title = element_text(size=14),
        legend.text = element_text(size=9),
        panel.background = element_rect(fill =  "grey90"))

print(p2)

10.3 Creating back-to-back bar charts

Example 4

rm(list = ls())

# load packages
library(dplyr)
library(ggplot2)

# create a fake data set
## some preparation
set.seed(123)
ten_positive_rand_numbers <- abs(rnorm(10)) + 0.1
the_prob <- ten_positive_rand_numbers / sum(ten_positive_rand_numbers)

fk_data <- data.frame(job_type = sample(LETTERS[1:10], 1000, 
                                        replace = TRUE, prob = the_prob),
                      gender = sample(c("Male", "Female"), 1000, 
                                      replace = TRUE))

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(job_type, gender) %>% 
  summarise(Freq = n()) %>% 
  # a trick!
  mutate(Freq = if_else(gender == "Male", -Freq, Freq))
## find the order
temp_df <-
  plotting_df %>% 
  filter(gender == "Female") %>% 
  arrange(Freq)
the_order <- temp_df$job_type


# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = job_type, y = Freq, group = gender, fill = gender)) +
  geom_bar(stat = "identity", width = 0.75) +
  coord_flip() +
  scale_x_discrete(limits = the_order) +
  # another trick!
  scale_y_continuous(breaks = seq(-150, 150, 50), 
                     labels = abs(seq(-150, 150, 50))) +
  labs(x = "Job type", y = "Count", title = "Back-to-back bar chart") +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(fill =  "grey90")) +
  # reverse the order of items in legend
  # guides(fill = guide_legend(reverse = TRUE)) +
  # change the default colors of bars
  scale_fill_manual(values=c("red", "blue"),
                    name="",
                    breaks=c("Male", "Female"),
                    labels=c("Male", "Female")) 

print(p)

Remark: We can use

scale_x_discrete(limits = rev(the_order))

to replace

scale_x_discrete(limits = the_order)

and the resulted chart is also called pyramid chart. If you google “population pyramid” you can find more examples of pyramid charts.

10.4 Creating Pareto charts

A Pareto chart basically is a bar chart (with the bars ordered) plus a frequency polygon (i.e. a line chart). It is useful for revealing something like the 80-20 rule—e.g. 80% of the accidents are due to 20% of the possible reasons. See https://en.wikipedia.org/wiki/Pareto_chart for more details. The following example shows how to make a Pareto chart. Please pay attention to how the layers are built up.

Example 5

rm(list = ls())

# load packages
library(dplyr)
library(ggplot2)

# create a fake data set
reasons <- c("Reason A", "Reason B", "reason C",
             "Reason D", "Reason E", "reason F")
set_prob <- c(0.1, 0.2, 0.6, 0.05, 0.02, 0.03)

fk_data <-
  data.frame(accident_NO = 1:1000,
             reason = sample(reasons, 1000, replace = TRUE, prob = set_prob))

# prepare the data for plotting
plotting_df <-
  fk_data %>% 
  group_by(reason) %>% 
  summarise(freq = n()) %>% 
  arrange(desc(freq)) %>%
  mutate(relative_freq = freq / sum(freq),
         cumulative_freq = cumsum(relative_freq)) 
## get the order of bars
the_order <- plotting_df$reason

# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = reason, weight = relative_freq)) +
  geom_bar(width = 0.5, fill = "blue") +
  scale_x_discrete(limits = the_order) +
  scale_y_continuous(label = scales::percent) +
  geom_point(aes(x = reason, y = cumulative_freq)) +
  geom_line(aes(x = reason, y = cumulative_freq, group = 1)) +
  # NB: Must use "group = 1"
  labs(x = "", y = "Relative frequency", 
       title = "A Pareto diagram for reasons of 1000 accidents") +
  theme(plot.title = element_text(hjust = 0.5))
  # NB: Use theme to center the title

print(p)

10.5 Creating lollipop charts

Notice that $\hbox{a lollipo} = \hbox{a segment} + \hbox{a point},$ thus it is natural to use geom_segment() and geom_point() to create lollipop charts.

Example 6

rm(list = ls())

library(ggplot2)
library(dplyr)

# create a fake data set
set.seed(9072017)
rand_numbers <- abs(rnorm(26))
the_prob <- rand_numbers/sum(rand_numbers)
fk_data <- data.frame(x = sample(LETTERS, 10000, replace = TRUE,
                                 prob = the_prob))

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(x) %>% 
  summarise(Freq = n()) %>% 
  mutate(proportion = Freq/sum(Freq)) %>% 
  arrange(proportion)

the_order <- plotting_df$x

# plotting

p <- 
  plotting_df %>% 
  ggplot(aes(x = x, y = proportion)) +
  geom_segment(aes(x = x, xend = x, y = 0, yend = proportion)) +
  # use the above to plot segments
  geom_point() +
  # use the above to plot points
  scale_x_discrete(limits = the_order) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Category", y = "Proportion", 
       title = "A lollipop chart") +
  theme(plot.title = element_text(hjust = 0.5))
  # use the above to center the title

print(p)

10.6 Creating treemaps

A treepmap can show three variables by using lables, sizes of rectangles and colors. Below is a treemap of the top 15 NZ’s most populous cities based on the 2016 data. The original data comes from: https://en.wikipedia.org/wiki/List_of_cities_in_New_Zealand

Example 7

rm(list = ls())

# load packages
library(treemap)
library(readr) # for read_csv

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

# make a tree map
treemap(dtf = NZ_cities,
        index=c("City_name"),
        vSize="Population",
        vColor="Population_density",
        palette="Spectral",
        type="value",
        border.col=c("grey70", "grey90"),
        fontsize.title = 18,
        algorithm="pivotSize",
        title ="Treemap of the top 15 NZ's most populous cities",
        title.legend="Population density (people/km^2)")

10.7 Creating scatter plots

A scatter plot is very useful for exploring the relationship between two continuous variables. With the following example, we show how to create a scatter plot. We want to emphasize the details, that is,

label properly
mark the outliers
add in the regression line
refit data and add in the new regression line

Example 8

rm(list = ls())

# load packages
library(readr) # for read_csv
library(ggplot2)

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

p1 <-
  ggplot(NZ_cities, aes(x = Area_in_km2, y = Population)) +
  geom_point() +
  scale_y_continuous(labels = scales::comma) +
  # NB: use the above to mark large numbers 
  labs(x = "Area (in km^2)", 
       title = paste0("Population vs. area for the \n",
                      "top 15 NZ's most populous cities")) +
  # NB: use paste0 to break a long line to two lines
  theme(plot.title = element_text(hjust = 0.5))

print(p1)

# Next, we want to label the points for 
# Auckland, Wellington, Christchurch, and Hamilton with their names and red
# Also, add the regression line in

# load packages
library(dplyr)
library(ggrepel) # for geom_text_repel()

# add two new columns to NZ_cities
biggest_cities <- c("Auckland", "Wellington", "Christchurch","Hamilton")
NZ_cities_1 <- 
  NZ_cities %>% 
  mutate(the_label = if_else(City_name %in% biggest_cities, City_name, ""),
         the_color = if_else(City_name %in% biggest_cities, "red", "black"))

p2 <-
  p1 +
  geom_text_repel(data = NZ_cities_1, aes(label = the_label)) +
  geom_point(color = NZ_cities_1$the_color) +
  # add in the regression line
  geom_smooth(method = "lm", se = FALSE) 

print(p2)

# Finally, refit the data after removing "Auckland", "Wellington", "Christchurch"
# add in the new regression line
NZ_cities_2 <- 
  NZ_cities %>%
  filter(!(City_name %in% biggest_cities[1:3])) %>% 
  select(City_name, Population, Area_in_km2)

## find the regression equtions
line_1 <- lm(Population ~ Area_in_km2, NZ_cities)
line_2 <- lm(Population ~ Area_in_km2, NZ_cities_2)
line_1_eq <- paste0("Line 1: ", "Population = ", 
                    round(line_1[[1]][1], 2), " + ",
                    round(line_1[[1]][2], 2), " * Area")
line_2_eq <- paste0("Line 2: ", "Population = ", 
                    round(line_2[[1]][1], 2), " + ",
                    round(line_2[[1]][2], 2), " * Area")

p3 <- 
  p1 +
  geom_text_repel(aes(label = NZ_cities_1$the_label)) +
  geom_point(color = NZ_cities_1$the_color) +
  # add in the regression line
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  geom_smooth(data = NZ_cities_2, method = "lm", se = FALSE, color = "purple") +
  annotate(geom = "text", x = 400, y = 1400000, 
           label=line_1_eq, color="blue") +
  annotate(geom = "text", x = 400, y = 1250000, 
           label=line_2_eq, color="purple")

print(p3)

10.8 Creating side-by-side box plots

Roughly speaking, a box plot shows the five-number summary—i.e. minimum, first quartile, second quartile, third quartile, and maximum—of data. Plotting several box plots together, we have the so-called side-by-side box plot, which is useful for comparison of data among groups.

In the following example, we will create a side-by-side box plot for random numbers drawn from the standard normal distribution, the t distribution with five degrees of freedom, the uniform distribution on $(-3, 3)$ , and the double exponential distribution with the probability density $f(y)=\frac{1}{2}\lambda e ^{-\lambda |y|}\ \hbox{for}\ -\infty <y<+\infty,$ where $\lambda=\sqrt{\frac{2}{\pi}}$ .

For our purpose, we need this

Technical note: We can show that if $X\sim \hbox{Exp}(\lambda)$ , $U\sim \hbox{Uniform}(0, 1)$ , and $X$ and $U$ are independent, then $Y=\left\{ \begin{array}{rl} -X, & \hbox{if}\ U\le 0.5,\\ X, & \hbox{if}\ U > 0.5, \end{array} \right.$ has a double exponential distribution; that is, the probability density function of $Y$ is $f(y) = \frac{\lambda}{2}e^{-\lambda |y|}\ \hbox{for}\ -\infty < y < +\infty.$

Example 9

rm(list = ls())

# load packages
library(dplyr)
library(tidyr) # for gather()
library(ggplot2)

# create a fake data set
set.seed(1234567)
fk_data <-
  data.frame(Normal = rnorm(1000),
             t_df_5 = rt(1000, df = 5),
             Unif = runif(1000, -3, 3),
             Exp = rexp(1000, rate = sqrt(2/pi)),
             Unif_temp = runif(1000, 0, 1)) %>% 
  mutate(the_indi = if_else(Unif_temp <= 0.5, -1, 1)) %>% 
  mutate(Double_exp = Exp * the_indi) %>% 
  select(-Exp, -Unif_temp, -the_indi)  

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  gather(key = distribution, value = rand_number, Normal:Double_exp)

# plot
p <- 
  plotting_df %>% 
  ggplot(aes(x = distribution, y = rand_number, group = distribution)) +
  geom_boxplot() +
  coord_flip() +
  scale_x_discrete(breaks = c("Double_exp", "Normal", "t_df_5", "Unif"), 
                   labels = c("Double Exponential", 
                              "Standard Normal", 
                              "t with df=5", 
                              "Uniform on (-3, 3)")) +
  # NB: use the above to change x-axis tick marks
  labs(x = "Distribution", y = "", title = "Side-by-side box plot") +
  theme(plot.title = element_text(hjust = 0.5))
  
print(p)

10.9 Creating grid plots

Grid plots allow us to show several (e.g. four) variables in one plot, and certainly they are useful. The key here is to use facet_grid().

Example 10

rm(list = ls())

# load packages
library(dplyr)
library(tidyr)
library(ggplot2)

# create a fake data set
## a helper function
set.seed(21072017)
create_year_data <- function(year = 2015, n = 20)
{temp_df <- data.frame(year = rep(year, n),
                       gender = sample(c("male", "female"), n, replace = TRUE),
                       stats_grade = rnorm(n, mean = 55, sd = 10),
                       math_grade = rnorm(n, mean = 60, sd = 10))
 return(temp_df)
}

data_2016 <- create_year_data(year = 2016, n = 20)
data_2015 <- create_year_data(year = 2015, n = 20)
data_2014 <- create_year_data(year = 2014, n = 20)

fk_data <- bind_rows(data_2016, data_2015, data_2014)

# prepare data for plotting
plotting_df <-
  fk_data %>% 
  group_by(year, gender) %>% 
  summarise(Stats = mean(stats_grade),
            Maths = mean(math_grade)) %>% 
  ungroup() %>% 
  # make a long table
  gather(key = subject, value = grade, -year, -gender) %>% 
  arrange(year)

# plot
the_title <- paste0("Averge maths and stats grades for\n",
                    "female and male students in 2014-2016")
p <- 
  plotting_df %>% 
  ggplot(aes(y = grade, color = gender)) +
  geom_segment(aes(x = gender, xend = gender, y = 0, yend = grade)) +
  geom_point(aes(x = gender, y = grade)) +
  coord_flip() +
  scale_x_discrete(limits = c("male", "female")) +
  facet_grid(year ~ subject) +
  labs(x = "", y = "Average Grade", 
       title = the_title) +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title = element_blank(),
        panel.background = element_rect(fill =  "grey90")) 

print(p)

10.10 Creating a simple PCA plot

When we have an $n$ -variate ( $n\ge 3$ ) data set, where each column contains continuous type data, we often want to look at the cluster relationship among the $m$ observations (or rows). For this purpose. we can make a PCA (Principal Component Analysis) plot. The fundamental idea here is that we map the $n$ -dimension data to 2-dimension (PC1 and PC2) data and then make a scatter plot of the 2-dimension data.

Example 11

rm(list=ls())

# load packages
library(dplyr, quietly = TRUE)
library(ggplot2, quietly = TRUE)

# NB: We will use iris, which is a data set from R 
(head(iris))

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

# do the mapping
## step 1: find the covariance
the_cov <-  cov(iris[, 1:4]) 

## step 2: find the eigen values and vectors
the_eigen <- eigen(the_cov)

## Remark: steps 1 and 2 together is equivalent to 
## PC <- prcomp(iris[, 1:4])

## step 3: mapping
map_2_PC1_PC2 <- 
  as.matrix(iris[,1:4]) %*% the_eigen[[2]][, 1:2]  %>% 
  as.data.frame()

# check how much variance are expressed by PC1 and PC2
(the_proportions = the_eigen[[1]][1:2]/sum(the_eigen[[1]]))

## [1] 0.92461872 0.05306648

# prepare dataframe for plotting
temp_df <- data.frame(Species = iris[, 5])
plotting_df <- 
  bind_cols(map_2_PC1_PC2, temp_df) %>%
  rename(PC1 = V1, PC2 = V2)

# plotting
ggplot(plotting_df, aes(x = PC1, y = PC2, color = Species)) +
  geom_point()

10.11 Creating time series plots

It is easy to create a time series plot. Here we pay attention to some “small” things.

Make sure the time is labelled correctly on x-axis.
If the values are for money and large, then we should show the $ sign and use “,” in the numbers for labeling y-axis.
We often want to get the points connected to show that the points are related.

Example 12

rm(list = ls())

# load packages
library(ggplot2)
library(lubridate) # for dealing with data related to time

# create a fake data set
for_year <- 1997:2016
for_month <- rep(12, 20)
for_day <- rep(31, 20)
col_1 <- paste0(for_year, "-", for_month, "-", for_day)
col_2 <- rep(0, 20)
## simulated data from a random walk
set.seed(20170805)
epsilon <- rnorm(20, mean = 0, sd = 10000)
x0 <- 5*1e5
for(i in 1:20)
{col_2[i] <- x0 + epsilon[i]
 x0 <- col_2[i]
}
fk_data <-
  data.frame(EndOfYear = ymd(col_1),
             Value_in_dollar = round(col_2, 0))

# plot data
p <- 
  ggplot(fk_data, aes(x = year(EndOfYear), y = Value_in_dollar)) +
  # Note the use of function year()
  geom_point() +
  geom_line() +
  scale_y_continuous(labels = scales::dollar) +
  labs(x = "Year", y = "Market value", title = "Plot of a time series") +
  theme(plot.title = element_text(hjust = 0.5))

print(p)

10.12 Showing pop-up’s

For exploratory data analysis, we may want our plot to have such a feature, which is when we hover the mouse on the plot some information will pop up. In the following example, I will show how to do it with plotly::ggplotly() (thanks to Chris Hansen for pointing this function to me.) Of course there are other useful R packages available for showing pop-up’s, such as googleVis and highcharter, if having an interest the readers can explore them.

Example 13

rm(list = ls())

# load packages
library(readr) # for read_csv
library(ggplot2)
library(plotly)

# read data in
the_url <- "https://raw.githubusercontent.com/LarryZhang2016/Data/master/NZ_cities.csv"
NZ_cities <- read_csv(the_url, skip =1)

p1 <-
  ggplot(NZ_cities, aes(x = Area_in_km2, y = Population, fill = City_name)) +
  geom_point() +
  scale_y_continuous(labels = scales::comma) +
  # NB: use the above to mark large numbers 
  labs(x = "Area (in km^2)", y = "PopSize",
       title = paste0("Population vs. area for the ",
                      "top 15 NZ's most populous cities")) +
  # NB: use paste0 to break a long line to two lines
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position="none")

ggplotly(p1) %>% config(displayModeBar = FALSE)

10.13 Putting plots in one panel

We create a few plots and want to put them together. It is handy to do so with gridExtra::grid.arrange(). (I thank Peter Ellis for pointing me to this function.)

Example 14

rm(list = ls())

# load packages
library(gridExtra)
library(ggplot2)

# a function for plotting probability density functions
plot_density <- function(func_name = dnorm, para = list(mean=0, sd=1),
                         domain = data.frame(x = c(-3, 3)),
                         title_lable = "PDF of N(0, 1)")
{p <- ggplot(domain, aes(x)) + 
  stat_function(fun = func_name, args = para, color = "red") +
  labs(x = "x", y = "f(x)", title = title_lable) +
  theme(plot.title = element_text(hjust = 0.5)) # make the title in center
 return(p)
}

# plot four probability density functions 
p1 <- plot_density()
p2 <- plot_density(func_name = dt, para = list(df=30),
                   title_lable = "PDF of t distribution with df=30")
p3 <- plot_density(func_name = dexp, para = list(rate = 1),
                   domain = data.frame(x = c(0, 10)),
                   title_lable = "PDF of Exp(1) distribution")
p4<- plot_density(func_name = dchisq, para = list(df=5),
                  domain = data.frame(x = c(0, 10)),
                  title_lable = "PDF of Chisq distribution with df=5")

# put the four plots together 
grid.arrange(p1, p2, p3, p4, newpage = TRUE, 
             layout_matrix = matrix(1:4, byrow = TRUE, 2, 2))