library(ggplot2) # beautiful graphs

library(ggthemes) # nice themes for ggplot2

library(ggbeeswarm) # helpful 'beeswarm' geometry

library(cowplot) # arrrange graphs

library(pander) # nice tables

How to Choose a Chart

[Share on Twitter]

Choosing the right chart to represent your data can be a daunting process. I believe that a starting point for this thinking is some basic statistical thinking about the type of variables that you have. At the broadest level, variables may be conceptualized as categorical variables, or continuous variables.

Once we have discerned the type of variable that have, there are two followup questions we may ask before deciding upon a chart strategy:

A Few Notes

A Note About Graph Labels

Graphs should have clear titles and labels.

A Note About Software

The principles of graphing discussed in this document transcend any particular software package, and could be implemented in many different software packages, such as SPSS, SAS, Stata, or R.

The graphs in these particular examples use ggplot2, a graphing library in R. ggplot2 graph syntax can be formidably complex, with a somewhat steep learning curve. More information about ggplot can be found here.

# Note that ggplot2 can be MUCH simpler 
# than these examples make it look.
#
# For example,

  ggplot(mydata, aes(x = x)) + geom_histogram()

# will produce a perfectly serviceable histogram.
# 
# Much of the complication of the code in this document is simply
# the result of formatting tweaks to get the graphs EXACTLY
# the way I wanted them.
# Note also, that for layout purposes, I am reading each ggplot call 
# into an object, e.g.

p1 <- ggplot(mydata, aes(x = x)) + geom_histogram()

# so that I can later use plot_grid to lay out the graphs.

# In your own work, you do not need to do this, and it may be simpler
# to simply say:

ggplot(...) + ...

A Note About Graph Colors

This document uses colors based upon official University of Michigan colors. Using colors that match the design scheme of your organization may be helpful.

# michigan colors

michigan_colors=c("#00274c", # blue
                  "#ffcb05", # maize
                  "#a4270b", # tappan red
                  "#e96300", # ross school orange
                  "#beb300", # wave field green
                  "#21c1bc", # taubman teal
                  "#2878ba", # arboretum blue
                  "#7207a5") # ann arbor amethyst

# name individual colors

michigan_blue <- "#00274c"

michigan_maize <- "#ffcb05"

tappan_red <- "#a4270b"

ross_school_orange <- "#e96300"

wave_field_green <- "#beb300"

taubman_teal <- "#21c1bc"

arboretum_blue <- "#2878ba"

ann_arbor_amethyst <- "#7207a5"

A Simulated Data File of Continuous and Categorical Data

A few randomly selected observations…

  x y z u v w s q
592 54.97 98.27 61.71 Group A Group A Group A Group 3 84.97
572 133.3 162.9 101.6 Group A Group A Group B Group 3 163.3
334 200.1 139.8 123.2 Group A Group B Group B Group 3 230.1
109 83.65 116.3 105.8 Group B Group B Group A Group 1 93.65
9 100.5 91.25 59.03 Group A Group A Group A Group 1 110.5
903 131.1 104.4 119.9 Group B Group B Group A Group 3 161.1
805 104.6 97.75 77.49 Group B Group B Group A Group 4 144.6
132 91.89 75.88 118 Group B Group B Group A Group 3 121.9
525 100.7 116.4 106.2 Group A Group A Group A Group 2 120.7
863 284.7 257.8 139.6 Group B Group B Group B Group 4 324.7

One Thing At A Time           Two Things At A Time

Continuous           Continuous By Categorical

my_histogram <- ggplot(mydata, aes(x = x)) + 
  geom_histogram(fill = arboretum_blue) + 
  ggtitle("histogram") +
  xlab("continuous") + ylab("count") + 
  theme_minimal()

my_facet_histogram <- ggplot(mydata, aes(x = x)) + 
  geom_histogram(fill = arboretum_blue) + 
  facet_wrap(~w, nrow = 2) + 
  ggtitle("histogram by group") + 
  xlab("continuous") + ylab("count") + 
  theme_minimal() +
  theme(axis.text=element_text(size = 5)) # small font size for axis

plot_grid(my_histogram, my_facet_histogram, ncol=2)

my_density <- ggplot(mydata, aes(x = y)) + 
  geom_density(fill = michigan_maize) + 
  ggtitle("density") +
  xlab("continuous") + ylab("density") + 
  theme_minimal()

my_facet_density <- ggplot(mydata, aes(x = y)) + 
  geom_density(fill = michigan_maize) + 
  facet_wrap(~w, nrow = 2) +
  ggtitle("density by group") +
  xlab("continuous") + ylab("density") + 
  theme_minimal() +
  theme(axis.text = element_text(size = 5)) # small font size for axis

plot_grid(my_density, my_facet_density, ncol = 2)

my_m_barchart <- ggplot(mydata, 
                        aes(x = 1, 
                            y = q, 
                            fill = factor(1))) + 
  stat_summary(fun.y = mean, geom = "bar") +
  scale_fill_manual(values = c(arboretum_blue)) + 
  ggtitle("barchart of mean") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_m_barchart <- ggplot(mydata, 
                              aes(x = factor(s), 
                                  y = q, 
                                  fill = s)) + 
  stat_summary(fun.y = mean, geom = "bar") + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("barchart of mean \nby group") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("mean of continuous") + 
  theme_minimal()

plot_grid(my_m_barchart, 
          my_facet_m_barchart, 
          ncol = 2)

my_horiz_m_barchart <- ggplot(mydata, 
                              aes(x = 1, 
                                  y = q, 
                                  fill = factor(1))) + 
  stat_summary(fun.y = mean, 
               geom = "bar") +
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue)) + 
  ggtitle("horizontal barchart of mean") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.y = element_blank()) +
  theme(axis.ticks.y = element_blank())

my_facet_horiz_m_barchart <- ggplot(mydata, 
                                    aes(x = factor(s), 
                                        y = q, 
                                        fill = s)) + 
  stat_summary(fun.y = mean, 
               geom = "bar") +
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) +   
  ggtitle("horizontal barchart of mean \nby group") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.y = element_blank()) +
  theme(axis.ticks.y = element_blank())

plot_grid(my_horiz_m_barchart,
          my_facet_horiz_m_barchart)

my_horiz_m_dotchart <- ggplot(mydata, 
                              aes(x = 1, 
                                  y = q, 
                                  fill = factor(1))) + 
  stat_summary(fun.y = mean, 
               geom = "point", size = 5) +
  coord_flip() +
  scale_color_manual(values = c(arboretum_blue)) + 
  ggtitle("horizontal dotchart of mean") + 
  guides(fill = FALSE) +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) 

my_facet_horiz_m_dotchart <- ggplot(mydata, 
                                    aes(x = factor(s), 
                                        y = q, 
                                        color = s)) + 
  stat_summary(fun.y = mean, 
               geom = "point", 
               size = 5) +
  coord_flip() +
  scale_color_manual(name = "group",
                     values = c(arboretum_blue, 
                                taubman_teal, 
                                michigan_blue,
                                michigan_maize)) +   
  ggtitle("horizontal dotchart of mean \nby group") + 
  guides(fill=FALSE) +
  xlab(" ") + 
  ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        axis.ticks = element_blank())

plot_grid(my_horiz_m_dotchart,
          my_facet_horiz_m_dotchart)

my_m_linechart <- ggplot(mydata, 
                         aes(x = factor(s), 
                             y = mean(q), 
                             group = 1)) + 
  stat_summary(fun.y = mean, 
               geom = "line", 
               size = 2, 
               color = arboretum_blue) +
  geom_blank() +
  ggtitle("linechart of mean") +
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_m_linechart <- ggplot(mydata, 
                               aes(x = factor(s), 
                                   y = q, 
                                   group = 1)) + 
  stat_summary(fun.y = mean, 
               geom = "line", 
               size = 2, 
               color = arboretum_blue) +
  ggtitle("linechart of mean \nby group") + 
  xlab(" ") + ylab("mean of continuous") + 
  theme_minimal() 

plot_grid(my_m_linechart, my_facet_m_linechart)

my_violin <- ggplot(mydata, 
                    aes(x = 1, 
                        y = y)) + 
  geom_violin(fill = ross_school_orange) + 
  ggtitle("violin plot") + 
  xlab(" ") + 
  ylab("continuous") + 
  theme_minimal()  +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_facet_violin <- ggplot(mydata, 
                          aes(x = 1, 
                              y = y)) + 
  geom_violin(fill = ross_school_orange) + 
  facet_wrap(~w, 
             ncol = 2) + 
  ggtitle("violin plot \nby group") + 
  xlab("categorical") + 
  ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

plot_grid(my_violin, my_facet_violin, ncol = 2)

my_boxplot <- ggplot(mydata, 
                     aes(x = 2, 
                         y = y)) + 
  geom_boxplot(colour=tappan_red) + 
  scale_x_discrete(limit = c(0,1,2)) +
  ggtitle("boxplot") + 
  xlab(" ") + 
  ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.ticks.x = element_blank())

my_conditional_boxplot <- ggplot(mydata, 
                                 aes(x = w, 
                                     y = y)) + 
  geom_boxplot(colour=tappan_red, width = .5) + 
  ggtitle("boxplot \nby group") + 
  xlab("categorical") + ylab("continuous") + 
  theme_minimal()

plot_grid(my_boxplot, my_conditional_boxplot, ncol = 2)

my_beeswarm <- ggplot(mydata, 
                      aes(x = y, 
                          y = 1)) + 
  geom_beeswarm(colour = ann_arbor_amethyst, 
                groupOnX = FALSE) + 
  ggtitle("beeswarm plot") + 
  xlab("continuous") + 
  ylab("") + 
  theme_minimal() +
  theme(axis.text=element_text(size = 5), 
        axis.text.y = element_blank()) 

my_facet_beeswarm <- ggplot(mydata, 
                            aes(x = y, 
                                y = 1)) + 
  geom_beeswarm(colour = ann_arbor_amethyst, 
                groupOnX = FALSE) + 
  facet_wrap(~w, nrow = 2) +
  ggtitle("beeswarm plot \nby group") + 
  xlab("continuous") + 
  ylab("") + 
  theme_minimal() +
  theme(axis.text.x = element_text(size = 5),
        axis.text.y = element_blank()) 

plot_grid(my_beeswarm, my_facet_beeswarm)

my_dotplot <- ggplot(mydata, 
                     aes(x = y)) + 
  geom_dotplot(colour=wave_field_green, 
               binwidth = 3.0) + 
  ggtitle("dotplot") + 
  xlab("continuous") + 
  ylab("density") + 
  theme_minimal()

my_facet_dotplot <- ggplot(mydata, 
                           aes(x = y)) + 
  geom_dotplot(colour=wave_field_green, 
               binwidth = 1.5) + 
  facet_wrap(~w, nrow = 2) + 
  ggtitle("dotplot \nby group") + 
  xlab("continuous") + 
  ylab("density") + 
  theme_minimal() +
  theme(axis.text=element_text(size = 5)) # small font size for axis

plot_grid(my_dotplot, 
          my_facet_dotplot, 
          ncol = 2)

One Thing At A Time           Two Things At A Time

Categorical           Categorical By Categorical

my_barchart <- ggplot(mydata, 
                      aes(s, 
                          fill = s)) + 
  geom_bar(width = 1.0) + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal,
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + 
  ylab("count") + 
  theme_minimal() 

my_facet_barchart <- ggplot(mydata, 
                            aes(s, 
                                fill = s)) + 
  geom_bar(width = 1.0) + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("bar chart \nby group") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() +
  theme(axis.text.x = element_text(size = rel(.6)))

plot_grid(my_barchart, my_facet_barchart, ncol = 2)

my_horiz_barchat <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("horizontal bar chart") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() 

my_horiz_facet_barchart <- ggplot(mydata, aes(s, fill = s)) + 
  geom_bar(width = 1.0) + 
  facet_wrap(~u, ncol = 1) +
  coord_flip() +
  scale_fill_manual(values = c(arboretum_blue, 
                               taubman_teal, 
                               michigan_blue,
                               michigan_maize)) + 
  ggtitle("horizontal bar chart \nby group") + 
  guides(fill=FALSE) +
  xlab("categorical") + ylab("count") + 
  theme_minimal() +
  theme(axis.text.y = element_text(size = rel(.5)))

plot_grid(my_horiz_barchat, my_horiz_facet_barchart, ncol = 2)

my_pie <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = 1) + 
  coord_polar(theta="y") + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("pie chart") + guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank()) + 
  theme(axis.ticks = element_blank())

my_facet_pie <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = 1, position = "fill") + 
  coord_polar(theta="y") + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("pie chart \nby group") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_pie, my_facet_pie, ncol = 2)

my_doughnut <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = .5) + 
  coord_polar(theta="y") + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("doughnut chart") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank()) + 
  theme(axis.ticks = element_blank())

my_facet_doughnut <- ggplot(mydata, aes(x = factor(1), fill = v)) + 
  geom_bar(width = .5, position = "fill") + 
  coord_polar(theta="y") + 
  facet_wrap(~u, ncol = 2) + 
  scale_fill_manual(values = michigan_colors) + 
  ggtitle("doughnut chart \nby group") + 
  guides(fill=FALSE) +
  xlab(" ") + ylab("categorical") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_doughnut, my_facet_doughnut, ncol = 2)

Continuous by Continuous

my_scatterplot <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  ggtitle("scatterplot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())
  
my_scatterplot_smoother <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  geom_smooth(method = lm, color = michigan_maize, size = 2) + 
  ggtitle("scatterplot with fit line") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_scatterplot, my_scatterplot_smoother)

my_hexagon <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_hex() + 
  ggtitle("hexagon plot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() + 
  theme(legend.text = element_text(size=4), 
        legend.key.size = unit(.25, "cm")) + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())  + 
  scale_fill_gradient(low = taubman_teal,
                      high = arboretum_blue) 

my_smoother <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_point(colour = ann_arbor_amethyst) + 
  geom_smooth(se=TRUE, color=michigan_maize, size=2) + 
  ggtitle("scatterplot with smoother") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

plot_grid(my_hexagon, my_smoother)

my_area <- ggplot(mydata, aes(x=x, y=y)) + 
  geom_area(position = "stack", fill = ross_school_orange) +
  ggtitle("area plot") + 
  xlab("continuous") + ylab("continuous") + 
  theme_minimal() + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank())

my_contour <- ggplot(mydata, aes(x=x, y=y)) + 
  stat_density_2d(aes(fill = ..level..), geom = "polygon") + 
  ggtitle("contour plot") + 
  xlab("continuous") + 
  ylab("continuous") + 
  theme_minimal() + 
  theme(axis.text.x = element_blank()) +
  theme(axis.text.y = element_blank())  + 
  theme(axis.ticks = element_blank()) + 
  scale_fill_gradient(low = taubman_teal,
                      high = arboretum_blue) 

plot_grid(my_area, my_contour)

Graphics made with the ggplot2 graphing library created by Hadley Wickham.

Available online at https://agroganweb.wordpress.com/data-visualization-dataviz/

How to Choose a Chart by Andrew Grogan-Kaylor is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. You are welcome to download and use this handout in your own classes, or work, as long as the handout remains properly attributed.

Last updated: July 04 2018 at 07:35