# 第 28 章 ggplot2之从图层到几何形状

## 28.1 图层的五大元素

ggplot2中每个图层都要有的五大元素：

1. 数据data
2. 美学映射mapping
3. 几何形状geom
4. 统计变换stat
5. 位置调整position

## 28.2 加载宏包

library(tidyverse)
library(palmerpenguins)
penguins <- penguins %>% drop_na()

## 28.3 stat_identity()

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "identity",
geom = "point",
params = list(na.rm = FALSE),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_identity(
geom = "point"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point()

## 28.4 stat_count()

Computed variables

• count: number of points in bin
• prop: groupwise proportion

默认几何形状

• geom_bar()

适用几何形状

• geom_point() / geom_bar()
penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = "identity"
)
penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "point",
mapping = aes(y = after_stat(count)),
position = "identity"
)

penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "bar",
mapping = aes(y = stage(start = NULL, after_stat = count)),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
stat_count(
geom = "bar"
)
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
geom_bar(
stat = "count"
)
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
stat_count(
geom = "point"
)
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
geom_point(
stat = "count"
)

## 28.5 stat_bin()

Computed variables

• count: number of points in bin
• density: density of points in bin, scaled to integrate to 1
• ncount: count, scaled to maximum of 1
• ndensity: density, scaled to maximum of 1

默认几何形状

• geom_bar()

适用几何形状

• geom_bar() / geom_histogram() / geom_freqpoly
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = "identity"
)
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "point",
mapping = aes(x = stage(start = bill_length_mm, after_stat = x),
y = after_stat(count)
),
position = "identity"
)
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm, y = after_stat(count))) +
stat_bin(
geom = "point"
)
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm, y = after_stat(count))) +
geom_bar(
stat = "bin"
)
## stat_bin() using bins = 30. Pick better value with binwidth.

geom_histogram 本质实际上是 geom_bar，都依赖stat_bin

penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = 'identity'
) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(ncount)),
position = 'identity'
) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm)) +
stat_bin(
mapping = aes(y = after_stat(count)),
geom = "bar",
position = 'identity'
) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm)) +
geom_histogram(
mapping = aes(y = after_stat(count)),
stat = "bin",
position = 'identity'
) 
## stat_bin() using bins = 30. Pick better value with binwidth.

penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
layer(
mapping = aes(y = after_stat(density)),
geom = "bar",
stat = "bin",
position = 'dodge'
) +
facet_wrap(vars(species)) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
layer(
mapping = aes(y = stage(NULL, after_stat = density)),
geom = "bar",
stat = "bin",
position = 'dodge'
) +
facet_wrap(vars(species)) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
stat_bin(
mapping = aes(y = after_stat(density)),
geom = "bar",
position = 'dodge'
) +
facet_wrap(vars(species)) 
## stat_bin() using bins = 30. Pick better value with binwidth.
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
geom_histogram(
aes(y = after_stat(density)),
position = 'dodge'
) +
facet_wrap(vars(species)) 
## stat_bin() using bins = 30. Pick better value with binwidth.

## 28.6 stat_density()

x(连续)核密度估计，可以看作是直方图的平滑版本

kernel = c("gaussian", "epanechnikov", "rectangular",
"triangular", "biweight",   "cosine",
"optcosine")

Computed variables

• density: density estimate
• count: density * number of points - useful for stacked density plots
• scaled: density estimate, scaled to maximum of 1
• ndensity: alias for scaled, to mirror the syntax of stat_bin()

默认几何形状

• geom_area()

适用几何形状

• geom_area()/ geom_line()/ geom_point()/ geom_density()
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "area",
params = list(kernel = "gaussian"),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "line",
params = list(kernel = "gaussian"),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "point",
params = list(kernel = "gaussian"),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm)) +
stat_density(
geom = "point",
kernel = "gaussian"
)

## 28.7 stat_boxplot()

• Aesthetics

• x or y; lower; upper; middle; ymin ; ymax
• Computed variables

• width: width of boxplot
• ymin: lower whisker = smallest observation greater than or equal to lower hinge - 1.5 * IQR
• lower: lower hinge, 25% quantile
• notchlower: lower edge of notch = median - 1.58 * IQR / sqrt(n)
• middle: median, 50% quantile
• notchupper: upper edge of notch = median + 1.58 * IQR / sqrt(n)
• upper: upper hinge, 75% quantile
• ymax: upper whisker = largest observation less than or equal to upper hinge + 1.5 * IQR

默认几何形状

• geom_boxplot()

适用几何形状

• geom_boxplot() / geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm))+
layer(
stat = "boxplot",
geom = "boxplot",
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_boxplot(
geom = "boxplot"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot()

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "boxplot",
mapping = aes(color = after_stat(middle)),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = after_stat(width)),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = notchupper)),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = ymax)),
position = "identity"
) 
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_boxplot()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = middle)),
params = list(color = "red", size = 5),
position = "identity"
) 
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot(
aes(colour = species,
fill = after_scale(alpha(colour, 0.4)))
)

## 28.8 stat_ydensity()

Computed variables

• density: density estimate
• scaled: density estimate, scaled to maximum of 1
• count: density * number of points - probably useless for violin plots
• violinwidth: density scaled for the violin plot, according to area, counts or to a constant maximum width
• n: number of points
• width: width of violin bounding box

默认几何形状

• geom_violin()

适用几何形状

• geom_violin() / geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom     = "violin",
stat     = "ydensity",
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom     = "point",
stat     = "ydensity",
position = "identity"
)

## 28.9 stat_bindot()

Computed variables

• x: center of each bin, if binaxis is “x”
• y: center of each bin, if binaxis is “x”
• binwidth: max width of each bin if method is “dotdensity”;width of each bin if method is “histodot”
• count: number of points in bin
• ncount: count, scaled to maximum of 1
• density: density of points in bin, scaled to integrate to 1, if method is “histodot”
• ndensity: density, scaled to maximum of 1, if method is “histodot”

默认几何形状

• geom_dotplot()

适用几何形状

• geom_dotplot()
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bindot",
geom = "dotplot",
mapping = aes(y = stage(start = NULL, after_stat = count)),
params = list(binwidth = 1, dotsize = 0.5),
position = position_nudge(-0.025)
)
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bindot",
geom = "point",
mapping = aes(y = stage(start = NULL, after_stat = count)),
params = list(binwidth = 1),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm)) +
geom_dotplot(
binwidth = 1,
dotsize = 0.5)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_dotplot(
binaxis = "y",
stackdir = "down",
dotsize = 0.4,
position = position_nudge(-0.025)
)
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## binwidth.

## 28.10 stat_sum()

Computed variables

• n : number of observations at position
• prop : percent of points in that panel at that position

默认几何形状

• geom_point()

适用几何形状

• geom_point() / geom_count() / geom_bar()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "sum",
geom = "point",
mapping = aes(size = after_stat(n)),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_sum(
geom = "point"
)

## 28.11 stat_smooth()

Computed variables

• y: predicted value
• ymin: lower pointwise confidence interval around the mean
• ymax: upper pointwise confidence interval around the mean
• se: standard error

默认几何形状

• geom_smooth()

适用几何形状

• geom_smooth() / geom_line() / geom_point()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "smooth",
stat = "smooth",
params = list(se = TRUE),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_smooth(
geom = "smooth",
se = TRUE
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_smooth(
se = TRUE
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(size = after_stat(ymax), color = after_stat(ymin)),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(color = after_stat(ymin)),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(color = stage(NULL, after_stat = ymin)),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "line",
stat = "smooth",
mapping = aes(color = after_stat(ymin)),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "pointrange",
stat = "smooth",
mapping = aes(color = after_stat(se)),
position = "identity"
)
## geom_smooth() using method = 'loess' and formula = 'y ~ x'
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "smooth",
mapping = aes(color = after_stat(y)),
geom = "point",
params = list(method  = "lm", formula = y ~ splines::ns(x, 2)),
position = "identity"
)

## 28.12 stat_bin_2d()

Computed variables

• count: number of points in bin
• density: density of points in bin, scaled to integrate to 1
• ncount: count, scaled to maximum of 1
• ndensity: density, scaled to maximum of 1

默认几何形状

• geom_tile()

适用几何形状

• geom_tile() / geom_point()/ geom_bin2d()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "tile",
stat = "bin_2d",
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "bin_2d",
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_bin_2d(
geom = "point"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point(
stat = "bin_2d"
)

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "bin_2d",
mapping = aes(size = after_stat(count)),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "tile",
stat = "bin_2d",
mapping = aes(fill = after_stat(count)),
position = "identity"
)

## 28.13 stat_bin_hex()

stat_bin2d()的六边形版本

Computed variables

• count: number of points in bin
• density: density of points in bin, scaled to integrate to 1
• ncount: count, scaled to maximum of 1
• ndensity: density, scaled to maximum of 1

默认几何形状

• geom_hex()

适用几何形状

• geom_hex()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "hex",
stat = "binhex",
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_bin_hex(
geom = "hex"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_hex(
stat = "binhex"
)

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "binhex",
mapping = aes(label = stage(NULL, after_stat = count)),
position = "identity"
)

## 28.14 stat_density_2d()

• 不计算等高线 (contour = FALSE)
• count: number of points in bin
• density: density of points in bin, scaled to integrate to 1
• ncount: count, scaled to maximum of 1
• ndensity: density, scaled to maximum of 1
• 计算等高线 (contour = TRUE)
• contour lines, for stat_contour() 等高线
• contour bands, for stat_contour_filled() 等高带
• Contours line types by contour_var = (density, ndensity, and count)

适用几何形状

• geom_density_2d() / geom_raster() / goem_tile() / geom_path() / geom_point() / geom_polygon()

### 28.14.1 先看看有等高线的情形

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat =  "density_2d",
geom =  "path",
params = list(contour = TRUE),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_density_2d(
contour = TRUE
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_density_2d()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_path(
stat = "density_2d",
contour = TRUE
)

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat =  "density_2d",
geom =  "point",
params = list(contour = TRUE),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "polygon",
mapping = aes(fill = after_stat(level)),
params = list(contour = TRUE),
position = "identity"
)

### 28.14.2 看看无等高线的情形

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "raster",
mapping = aes(fill = after_stat(density)),
params = list(contour = FALSE),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "tile",
mapping = aes(fill = after_stat(count)),
params = list(contour = FALSE),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_density_2d(
geom = "tile",
mapping = aes(fill = after_stat(density)),
contour = FALSE
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_tile(
stat = "density_2d",
mapping = aes(fill = after_stat(density)),
contour = FALSE
)

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "point",
mapping = aes(size = after_stat(count)),
params = list(n = 20, contour = FALSE),
position = "identity"
)

## 28.15 stat_ellipse()

Computed variables

• x
• y

默认几何形状

• geom_path()

适用几何形状

• geom_path() /geom_polygon()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
layer(
stat = "ellipse",
geom = "path",
params = list(type = "norm", linetype = 2),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
stat_ellipse(
geom = "path",
type = "norm",
linetype = 2
)
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
geom_point() +
geom_path(
stat = "ellipse",
type = "norm",
linetype = 2
)

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
layer(
stat = "ellipse",
geom = "path",
mapping = aes(color = after_stat(y)),
params = list(type = "norm"),
position = "identity"
)

## 28.16 stat_summary

说明

• stat_summary() operates on unique x or y;
• stat_summary_bin() operates on binned x or y.

Summary functions

• fun.data : Complete summary function. Should take numeric vector as input and return data frame as output

• fun.min : min summary function (should take numeric vector and return single number)

• fun : main summary function (should take numeric vector and return single number)

• fun.max : max summary function (should take numeric vector and return single number)

适用几何形状

• geom_errorbar() / geom_pointrange() /geom_linerange() / geom_crossbar() /geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "summary",
params = list(fun.data = "mean_cl_normal"),
geom = "errorbar",
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_depth_mm)) +
stat_summary(
fun.data = mean_cl_normal,
geom = "errorbar"
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
layer(
stat     = "summary",
geom     = "point",
mapping  = aes(size = after_stat(ymin)),
position = "identity"
)
## No summary function supplied, defaulting to mean_se()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom     = "point",
stat     = "summary",
params   = list(fun = "mean", color = "red", size = 5),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
geom = "point",
stat = "summary",
params = list(fun = median),
mapping = aes(y = stage(start = bill_length_mm, after_stat = y)),
position = "identity"
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
layer(
geom = "pointrange",
stat = "summary",
params = list(fun.data = ~mean_se(., mult = 5), color = "red", size = 2),
position = "identity"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom  = "point",
fun   = "mean",
color = "red",
size  = 5
)
penguins %>%
ggplot(aes( x = body_mass_g, y = species)) +
geom_jitter() +
stat_summary(
fun = mean,
geom = "point",
size = 5,
color = "red",
alpha = 1
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 5),
color = "red",
geom = "pointrange",
size = 2
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
geom_pointrange(
stat = "summary",
fun.data = ~mean_se(., mult = 5),
color = "red",
size = 2
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = mean_cl_boot,
color = "red",
geom = "pointrange"
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun = mean,
fun.min = min,
fun.max = max,
geom = "pointrange",
color = "red",
size = 5
)
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 5),
color = "red",
geom = "pointrange"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm, group = sex)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 2),
color = "red",
geom = "pointrange"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm, group = sex)) +
stat_summary(fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "pointrange") +
stat_summary(fun = mean,
geom = "line") +
facet_wrap(~ sex)

### 28.16.1 自定义函数

my_count <- function(x){
tibble(
y = length(x),
)
}

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
geom = "bar",
fun.data = my_count
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_bar(
stat = "summary",
fun.data = my_count,
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
geom = "bar",
stat = "summary",
params = list(fun.data = my_count),
position = "identity"
)

### 28.16.2 添加文本

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "point",
fun = "mean",
color = "red",
size = 5
) +
stat_summary(
aes(label = after_stat(y)),
geom = "text",
fun.data = "mean_se",
color = "red",
size = 5
)
n_fun <- function(x) {
data.frame(y = 62,
label = length(x),
color = ifelse(length(x) > 100, "red", "blue")
)
}

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot() +
geom_jitter() +
stat_summary(
fun.data = n_fun,
geom = "text"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "pointrange",
fun.data = "mean_cl_boot",
color = "red"
)
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "pointrange",
fun.data = ~ mean_se(., mult = 5),
color = "red",
size = 1
) +
stat_summary(
fun = "mean",
geom = "text",
mapping = aes(y = stage(bill_length_mm, after_stat = 30),
label = round(after_stat(y), 2)),
color = "blue",
size = 5
) +
stat_summary(
fun = "length",
geom = "text",
mapping = aes(y = stage(bill_length_mm, after_stat = 62),
label = after_stat(y)
),
color = "black",
size = 5
)

### 28.16.3 更多

calc_median_and_fill <- function(x, threshold = 40) {
tibble(
y = median(x),
fill = if_else(y < threshold, "red", "gray50")
)
}

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
fun.data = calc_median_and_fill,
geom = "bar"
)
calc_median_and_color <- function(x, threshold = 40) {
tibble(
y = median(x),
color = if_else(y < threshold, "red", "gray50")
)
}

penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
fun.data = calc_median_and_color,
geom = "point",
size = 5
)
penguins %>%
ggplot(aes(species, bill_depth_mm)) +
stat_summary(
fun.data = function(x) {

scaled_size <- length(x)/nrow(penguins)

mean_se(x) %>%
mutate(size = scaled_size)
}
)
penguins %>%
ggplot(aes(species, bill_depth_mm)) +
geom_point(position = position_jitter(width = .2), alpha = .3) +
stat_summary(fun = mean,
na.rm = TRUE,
geom = "point",
color = "dodgerblue",
size = 4,
shape = "diamond") +
stat_summary(fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "dodgerblue") +
stat_summary(fun = mean,
na.rm = TRUE,
aes(group = 1),
geom = "line",
color = "dodgerblue",
size = .75) 
## Warning: Using size aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use linewidth instead.
## This warning is displayed once every 8 hours.
## Call lifecycle::last_lifecycle_warnings() to see where this warning was
## generated.
penguins %>%
ggplot(aes(species, bill_depth_mm, group = sex, color = sex)) +
geom_point(
position = position_jitterdodge(
jitter.width = .2,
dodge.width = .7
),
alpha = .1
) +
stat_summary(
fun = mean,
na.rm = TRUE,
geom = "point",
shape = "diamond",
size = 4,
color = "black",
position = position_dodge(width = .7)
) +
stat_summary(
fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "black",
position = position_dodge(width = .7)
) +
scale_color_brewer(palette = "Set1")
penguins %>%
ggplot(aes(species, bill_depth_mm, group = sex, color = sex)) +
geom_point(
position = position_jitterdodge(
jitter.width = .2,
dodge.width = .7
),
alpha = .1
) +
stat_summary(
fun = mean,
na.rm = TRUE,
geom = "point",
shape = "diamond",
size = 4,
color = "black",
position = position_dodge(width = .7)
) +
stat_summary(
fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "black",
position = position_dodge(width = .7)
) +
scale_color_brewer(palette = "Set1") +
facet_wrap(~sex)

## 28.17 stat_summary_bin

penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
layer(
stat = "summary_bin",
geom = "bar",
params = list(fun = mean, color = "red", orientation = 'x'),
position = "identity"
)
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'x'     # bin on x axis, summary mean on y
)
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'y'
)
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
geom_bar(
stat = "summary_bin",
fun = mean,
color = "red"
)
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'y'  # bin on y axis, summary mean on x
)

## 28.18 stat_function()

Computed variables

• x: x values along a grid
• y: value of the function evaluated at corresponding x

默认几何形状

• geom_line()

适用几何形状

• geom_line() / geom_point() /geom_function()
tibble(x = runif(n = 100, min = -5, max = 5)) %>%
ggplot() +
layer(
stat = "function",
geom = "point",
params = list(fun = dnorm, args = list(mean = 0, sd = 0.5)),
position = "identity"
) +
xlim(-2, 2)
tibble(x = runif(n = 100, min = -5, max = 5)) %>%
ggplot() +
layer(
stat = "function",
geom = "point",
params = list(fun =  ~ 0.5*exp(-abs(.x))),
position = "identity"
) +
xlim(-2, 2)

## 28.19 stat_spoke()

penguins %>%
mutate(angle = flipper_length_mm / (2*pi) ) %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "identity",
geom = "spoke",
mapping = aes(angle = angle),
position = "identity"
)
penguins %>%
mutate(angle = flipper_length_mm / (2*pi) ) %>%

ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_spoke(
mapping = aes(angle = angle),
)

## 28.20 stat_quantile()

quantreg::rq(bill_depth_mm ~ bill_length_mm,
data = penguins,
tau = c(0.25, 0.5, 0.75)
)

Computed variables

• quantile: quantile of distribution

默认几何形状

• geom_quantile()

适用几何形状

• geom_line() / geom_point() / geom_quantile()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "quantile",
geom = "quantile",
params = list(quantiles = c(0.25, 0.5, 0.75)),
position = "identity"
)
## Smoothing formula not specified. Using: y ~ x
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "quantile",
geom = "point",
mapping = aes(color = after_stat(quantile)),
params = list(quantiles = c(0.25, 0.5, 0.75)),
position = "identity"
)
## Smoothing formula not specified. Using: y ~ x

## 28.21 stat_summary_2d()

• 看参数stat_summary() 是对每一个x统计汇总summary，有多少个唯一的x, 就有多少个value.

• 而stat_summary_2d() 有 bin的参数，它是对落在(x，y)构成的具有一定binwidth的长方形区域内的z统计汇总. 有多少个长方形，就有多少个value.

Aesthetics

• x: horizontal position
• y: vertical position
• z: value passed to the summary function

Computed variables

• x, y : Location
• value : Value of summary statistic.

默认几何形状

• geom_tile() for stat_summary_2d()
• geom_hex() for stat_summary_hex()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
layer(
stat = "summary_2d",
geom = "tile",
params = list(fun = ~ sum(.x^2)),
position = "identity"
) 
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_2d(
geom = "point",
fun = ~ sum(.x^2),  # summary statistic for z
mapping = aes(size = after_stat(value))
) 

### 28.21.1 测试

penguins %>%
distinct(bill_length_mm, bill_depth_mm)
## # A tibble: 329 × 2
##    bill_length_mm bill_depth_mm
##             <dbl>         <dbl>
##  1           39.1          18.7
##  2           39.5          17.4
##  3           40.3          18
##  4           36.7          19.3
##  5           39.3          20.6
##  6           38.9          17.8
##  7           39.2          19.6
##  8           41.1          17.6
##  9           38.6          21.2
## 10           34.6          21.1
## # ℹ 319 more rows

sum是一个点一个位置

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "sum",
mapping = aes(label = after_stat(n), color = as.factor(after_stat(n)) ),
params = list(size = 4),
position = "identity"
)

bin_2d是一个bin一个统计

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "bin_2d",
mapping = aes(label = stage(NULL, after_stat = count)),
position = "identity"
)

stat_summary_2d也是一个bin一个位置

n_fun <- function(z) {
length(z)
}

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_2d(
fun = n_fun,
geom = "text",
mapping = aes(label = after_stat(value))
)

## 28.22 stat_summary_hex()

penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
layer(
stat = "summary_hex",
geom = "tile",
params = list(fun = ~ sum(.x^2), binwidth = c(0.5, 0.2)),
position = "identity"
) 
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_hex(
geom = "tile",
fun = ~ sum(.x^2),         # summary statistic for z
binwidth = c(0.5, 0.2)     # Numeric vector giving bin width in both vertical and horizontal directions
) 

## 28.23 stat_contour() and stat_contour_filled()

Computed variables

• level: Height of contour. For contour lines, this is numeric vector that represents bin boundaries. For contour bands, this is an ordered factor that represents bin ranges.
• level_low: level_high, level_mid (contour bands only) Lower and upper bin boundaries for each band, as well the mid point between the boundaries.
• nlevel: Height of contour, scaled to maximum of 1.
• piece: Contour piece (an integer).

默认几何形状

• geom_contour() / geom_contour_filled()

适用几何形状

• geom_contour() / geom_contour_filled()
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
layer(
stat = "contour",
geom = "path",
mapping = aes(colour = after_stat(level)),
position = "identity"
)
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
stat_contour(
geom = "path",
mapping = aes(colour = after_stat(level))
)
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
geom_contour(
aes(colour = after_stat(level))
)

## 28.24 课后作业

• 写成对应的stat_***() 版本和geom_***()版本
library(tidyverse)
library(palmerpenguins)

penguins <- penguins %>% drop_na()

ggplot() +
layer(
data       = penguins,
mapping    = aes(x = species, y = bill_length_mm, color = fct_rev(sex)),
stat       = "summary",
params     = list(fun = "mean"),
geom       = "point",
position   = position_dodge(width = 0.5)
)
• 写出对应的stat_***() 版本和layer()版本
penguins %>%
ggplot(aes(species, island)) +
geom_count(aes(size = after_stat(n)), show.legend = FALSE) 
• 上题用layer写，但要求不用stat = "sum", 而用stat = "summary"