第 28 章 ggplot2之从图层到几何形状
用ggplot2,大多是从几何形状出发,总有“只见树木不见森林”的感觉。我尝试从图层结构出发,去思考ggplot2绘图原理。欢迎大家批评指正。
28.1 图层的五大元素
ggplot2中每个图层都要有的五大元素:
- 数据data
- 美学映射mapping
- 几何形状geom
- 统计变换stat
- 位置调整position
数据映射后,需要指定一种数据统计变换的方式,统计计算数据(不进行统计变换可以理解为是等值变换),最后通过某种几何形状geom来对其进行可视化的展现。
我们现在按照layer() -> stat_*() -> geom_*()
这个思路来,理解各种图形。
一般情况下,统计变换会生成新的数据列,在ggplot2里称之为Computed variables
,如果想要这些新变量映射到图形属性,就需要使用 after_stat()
或者stage()
函数,具体见下面的案例。
28.3 stat_identity()
就是什么也不干,即等值变换。
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "identity",
geom = "point",
params = list(na.rm = FALSE),
position = "identity"
)
data:image/s3,"s3://crabby-images/cd916/cd916f537035cd9b119ba7b79151f198dfbf3631" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_identity(
geom = "point"
)
data:image/s3,"s3://crabby-images/bea6c/bea6ce57469da5300222fc584cd7220e134d915d" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point()
data:image/s3,"s3://crabby-images/f30a5/f30a5ef7f809fc81457395e3039472beaee40996" alt=""
28.4 stat_count()
统计 落在x(离散)位置上,点的个数
Computed variables
- count: number of points in bin
- prop: groupwise proportion
默认几何形状
- geom_bar()
适用几何形状
- geom_point() / geom_bar()
penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/bdd43/bdd43c8c01e3ddf65e030a405543af23e299220e" alt=""
penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "point",
mapping = aes(y = after_stat(count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/7d4c1/7d4c16f1e22822164fe8ea5bfbe04f35f8d0f0ef" alt=""
这里aes(y = after_stat(count))
可以看作是aes(y = stage(start = NULL, after_stat = count))
的简写
penguins %>%
ggplot(aes(x = species)) +
layer(
stat = "count",
geom = "bar",
mapping = aes(y = stage(start = NULL, after_stat = count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/f613f/f613f7b3974043cca9836eab6c991579df7dd1b9" alt=""
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
stat_count(
geom = "bar"
)
data:image/s3,"s3://crabby-images/5fd3e/5fd3e19d145e3e13b4e3f63b0a3b1214e32b7fdd" alt=""
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
geom_bar(
stat = "count"
)
data:image/s3,"s3://crabby-images/ae2a4/ae2a4f54bd631d6c74966ff515f9f738e3b4504a" alt=""
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
stat_count(
geom = "point"
)
data:image/s3,"s3://crabby-images/23939/239391eab1116a8ce4fd1458949ec032f5f2cf83" alt=""
penguins %>%
ggplot(aes(x = species, y = after_stat(count))) +
geom_point(
stat = "count"
)
data:image/s3,"s3://crabby-images/4f8bf/4f8bfd13ed37b23e79cbc014078b9b4d255abacd" alt=""
28.5 stat_bin()
统计 落在x(连续)区间上,点的个数
Computed variables
- count: number of points in bin
- density: density of points in bin, scaled to integrate to 1
- ncount: count, scaled to maximum of 1
- ndensity: density, scaled to maximum of 1
默认几何形状
- geom_bar()
适用几何形状
- geom_bar() / geom_histogram() / geom_freqpoly
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/294d4/294d45cc640aac7ae47d52a384af3fa54a62456f" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "point",
mapping = aes(x = stage(start = bill_length_mm, after_stat = x),
y = after_stat(count)
),
position = "identity"
)
data:image/s3,"s3://crabby-images/a0364/a03642127bfb887b1186f63de503812cf8b97fcf" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = after_stat(count))) +
stat_bin(
geom = "point"
)
data:image/s3,"s3://crabby-images/c7edd/c7edd13c0b5568fe66eeda6f29cfa3fa51cf9639" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = after_stat(count))) +
geom_bar(
stat = "bin"
)
data:image/s3,"s3://crabby-images/8cfdc/8cfdc093a863d37ea92cb9551bcee5afebdd1bb1" alt=""
geom_histogram 本质实际上是 geom_bar,都依赖stat_bin
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(count)),
position = 'identity'
)
data:image/s3,"s3://crabby-images/d6998/d69987b92854ca52e589c451ad6e582f48bed996" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bin",
geom = "bar",
mapping = aes(y = after_stat(ncount)),
position = 'identity'
)
data:image/s3,"s3://crabby-images/0eb1f/0eb1fd836326fd71c14697a16d3da3e73175b20a" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
stat_bin(
mapping = aes(y = after_stat(count)),
geom = "bar",
position = 'identity'
)
data:image/s3,"s3://crabby-images/3799a/3799a9110d2538e359da3d7e4f0a720294578c40" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
geom_histogram(
mapping = aes(y = after_stat(count)),
stat = "bin",
position = 'identity'
)
data:image/s3,"s3://crabby-images/5e524/5e52492d5489a624d005e2b1f9ded1869a677afe" alt=""
复杂点的geom_histogram()
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
layer(
mapping = aes(y = after_stat(density)),
geom = "bar",
stat = "bin",
position = 'dodge'
) +
facet_wrap(vars(species))
data:image/s3,"s3://crabby-images/e2be1/e2be1394098f37024ab6036bdf3625caded23ebf" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
layer(
mapping = aes(y = stage(NULL, after_stat = density)),
geom = "bar",
stat = "bin",
position = 'dodge'
) +
facet_wrap(vars(species))
data:image/s3,"s3://crabby-images/9832d/9832dcea30266f8c6a4be558dacccd27447f23b7" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
stat_bin(
mapping = aes(y = after_stat(density)),
geom = "bar",
position = 'dodge'
) +
facet_wrap(vars(species))
data:image/s3,"s3://crabby-images/60fc0/60fc074c72e497af608d70bed532e0f5b91b6694" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, fill = sex)) +
geom_histogram(
aes(y = after_stat(density)),
position = 'dodge'
) +
facet_wrap(vars(species))
data:image/s3,"s3://crabby-images/7e555/7e555084fe789e972546429c41de8a066856091c" alt=""
28.6 stat_density()
x(连续)核密度估计,可以看作是直方图的平滑版本
kernel = c("gaussian", "epanechnikov", "rectangular",
"triangular", "biweight", "cosine",
"optcosine")
Computed variables
- density: density estimate
- count: density * number of points - useful for stacked density plots
- scaled: density estimate, scaled to maximum of 1
- ndensity: alias for scaled, to mirror the syntax of stat_bin()
默认几何形状
- geom_area()
适用几何形状
- geom_area()/ geom_line()/ geom_point()/ geom_density()
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "area",
params = list(kernel = "gaussian"),
position = "identity"
)
data:image/s3,"s3://crabby-images/38d3f/38d3f240f27f9eb708504fa104ed4a41066d2a8c" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "line",
params = list(kernel = "gaussian"),
position = "identity"
)
data:image/s3,"s3://crabby-images/ecd28/ecd281e71b54a2a07f8f3f1b5107e8034499da82" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "density",
geom = "point",
params = list(kernel = "gaussian"),
position = "identity"
)
data:image/s3,"s3://crabby-images/365b8/365b8f19e6b9368d5a5771b8cf491fe0aab82188" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
stat_density(
geom = "point",
kernel = "gaussian"
)
data:image/s3,"s3://crabby-images/735a5/735a59cb573dadc8d6dda23b41e1722aa31d2723" alt=""
28.7 stat_boxplot()
计算连续变量的五个统计值 (the median, two hinges and two whiskers), 以及outlier
-
Aesthetics
- x or y; lower; upper; middle; ymin ; ymax
-
Computed variables
-
width
: width of boxplot -
ymin
: lower whisker = smallest observation greater than or equal to lower hinge - 1.5 * IQR -
lower
: lower hinge, 25% quantile -
notchlower
: lower edge of notch = median - 1.58 * IQR / sqrt(n) -
middle
: median, 50% quantile -
notchupper
: upper edge of notch = median + 1.58 * IQR / sqrt(n) -
upper
: upper hinge, 75% quantile -
ymax
: upper whisker = largest observation less than or equal to upper hinge + 1.5 * IQR
-
默认几何形状
- geom_boxplot()
适用几何形状
- geom_boxplot() / geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm))+
layer(
stat = "boxplot",
geom = "boxplot",
position = "identity"
)
data:image/s3,"s3://crabby-images/e4967/e496723a447819ee2dafb1209cc20bc8e999883a" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_boxplot(
geom = "boxplot"
)
data:image/s3,"s3://crabby-images/e280d/e280db4d6abdf69defd7a087c04066d73d7bc4f3" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot()
data:image/s3,"s3://crabby-images/1d9bc/1d9bc42ccbcc4de400fde35881e2592aceb56944" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "boxplot",
mapping = aes(color = after_stat(middle)),
position = "identity"
)
data:image/s3,"s3://crabby-images/ceb9b/ceb9b0ae6c48235a66c77a5b89ebe98accaa4be0" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = after_stat(width)),
position = "identity"
)
data:image/s3,"s3://crabby-images/6b75a/6b75a683f859acdc5d31c766726c8351b988b4b1" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = notchupper)),
position = "identity"
)
data:image/s3,"s3://crabby-images/294a8/294a87822939fd857417d5ff507590ba88a91c01" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = ymax)),
position = "identity"
)
data:image/s3,"s3://crabby-images/b2426/b2426d302f3deb3907a40db0e071e830e34477f2" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_boxplot()
data:image/s3,"s3://crabby-images/34504/345046e08298c0090d38bca30c32cf379fdb4a4f" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "boxplot",
geom = "point",
mapping = aes(y = stage(bill_length_mm, after_stat = middle)),
params = list(color = "red", size = 5),
position = "identity"
)
data:image/s3,"s3://crabby-images/8b870/8b870ccb1b2ee07e276fb048c720fc8e724a0316" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot(
aes(colour = species,
fill = after_scale(alpha(colour, 0.4)))
)
data:image/s3,"s3://crabby-images/d8db6/d8db6b33e85172d75cf9a4f2e79eb86e12440d22" alt=""
28.8 stat_ydensity()
可以看作是箱线图的密度图呈现
Computed variables
- density: density estimate
- scaled: density estimate, scaled to maximum of 1
- count: density * number of points - probably useless for violin plots
- violinwidth: density scaled for the violin plot, according to area, counts or to a constant maximum width
- n: number of points
- width: width of violin bounding box
默认几何形状
- geom_violin()
适用几何形状
- geom_violin() / geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom = "violin",
stat = "ydensity",
position = "identity"
)
data:image/s3,"s3://crabby-images/3a962/3a9623f1b32a8f5ee2bbb7a8e770000ded50c435" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom = "point",
stat = "ydensity",
position = "identity"
)
data:image/s3,"s3://crabby-images/ec358/ec3588b700759e661738ce30ffaf99f934fcce32" alt=""
28.9 stat_bindot()
圆点图,是直方图的另外一种形式
Computed variables
- x: center of each bin, if binaxis is “x”
- y: center of each bin, if binaxis is “x”
- binwidth: max width of each bin if method is “dotdensity”;width of each bin if method is “histodot”
- count: number of points in bin
- ncount: count, scaled to maximum of 1
- density: density of points in bin, scaled to integrate to 1, if method is “histodot”
- ndensity: density, scaled to maximum of 1, if method is “histodot”
默认几何形状
- geom_dotplot()
适用几何形状
- geom_dotplot()
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bindot",
geom = "dotplot",
mapping = aes(y = stage(start = NULL, after_stat = count)),
params = list(binwidth = 1, dotsize = 0.5),
position = position_nudge(-0.025)
)
data:image/s3,"s3://crabby-images/50f72/50f7252ef97d9918598ebdf24649f3e97ebf8436" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
layer(
stat = "bindot",
geom = "point",
mapping = aes(y = stage(start = NULL, after_stat = count)),
params = list(binwidth = 1),
position = "identity"
)
data:image/s3,"s3://crabby-images/ce77c/ce77ccf2c19e89e8a76917be8a3e07a566c88b40" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm)) +
geom_dotplot(
binwidth = 1,
dotsize = 0.5)
data:image/s3,"s3://crabby-images/b2a49/b2a49a1f212ebe152547bb44b3662aabcd42e99f" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_dotplot(
binaxis = "y",
stackdir = "down",
dotsize = 0.4,
position = position_nudge(-0.025)
)
data:image/s3,"s3://crabby-images/8aef0/8aef03901d41c9c9f647ca1c7de66d51f002a95a" alt=""
28.10 stat_sum()
统计落在x(离散或者连续), y(离散或者连续)位置上,点的个数
Computed variables
- n : number of observations at position
- prop : percent of points in that panel at that position
默认几何形状
- geom_point()
适用几何形状
- geom_point() / geom_count() / geom_bar()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "sum",
geom = "point",
mapping = aes(size = after_stat(n)),
position = "identity"
)
data:image/s3,"s3://crabby-images/31760/317601a1174ca5a752d4ae705a6aa18480c33df7" alt=""
data:image/s3,"s3://crabby-images/d258e/d258e862f90ae7a4f311621cfb0c43cdf22111dc" alt=""
28.11 stat_smooth()
根据x,y数据和拟合公式,计算每个点位置的拟合值以及标准误
Computed variables
- y: predicted value
- ymin: lower pointwise confidence interval around the mean
- ymax: upper pointwise confidence interval around the mean
- se: standard error
默认几何形状
- geom_smooth()
适用几何形状
- geom_smooth() / geom_line() / geom_point()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "smooth",
stat = "smooth",
params = list(se = TRUE),
position = "identity"
)
data:image/s3,"s3://crabby-images/d22d0/d22d082be98c3b52f7d936245404d2242834275f" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_smooth(
geom = "smooth",
se = TRUE
)
data:image/s3,"s3://crabby-images/9fc06/9fc06e586c7a5012fb0077d434d03dfffdd2e54d" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_smooth(
se = TRUE
)
data:image/s3,"s3://crabby-images/4b3b9/4b3b90a904d4b4e5ac5ed5ed2c1c10f9e2cecc2e" alt=""
统计转换后,可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(size = after_stat(ymax), color = after_stat(ymin)),
position = "identity"
)
data:image/s3,"s3://crabby-images/88427/88427e975eaf92fdacdc75c07824364bf1e26c34" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(color = after_stat(ymin)),
position = "identity"
)
data:image/s3,"s3://crabby-images/3fbf0/3fbf002f14678e5eea83d91443149daecebb9f50" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "smooth",
mapping = aes(color = stage(NULL, after_stat = ymin)),
position = "identity"
)
data:image/s3,"s3://crabby-images/49a58/49a58edf321ce7f6081bbc2823f18d638391dc33" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "line",
stat = "smooth",
mapping = aes(color = after_stat(ymin)),
position = "identity"
)
data:image/s3,"s3://crabby-images/c4fb6/c4fb612cc28d864163f8c96cfa91df6035705a73" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "pointrange",
stat = "smooth",
mapping = aes(color = after_stat(se)),
position = "identity"
)
data:image/s3,"s3://crabby-images/7588c/7588ccd517bc5b156be27755f04ba978ba3ff524" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "smooth",
mapping = aes(color = after_stat(y)),
geom = "point",
params = list(method = "lm", formula = y ~ splines::ns(x, 2)),
position = "identity"
)
data:image/s3,"s3://crabby-images/d6d76/d6d76123a69597662303e7b4b91a61aae4a3fb27" alt=""
28.12 stat_bin_2d()
统计 落在x和y(长方形)区域上,点的个数
Computed variables
- count: number of points in bin
- density: density of points in bin, scaled to integrate to 1
- ncount: count, scaled to maximum of 1
- ndensity: density, scaled to maximum of 1
默认几何形状
- geom_tile()
适用几何形状
- geom_tile() / geom_point()/ geom_bin2d()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "tile",
stat = "bin_2d",
position = "identity"
)
data:image/s3,"s3://crabby-images/b8fb4/b8fb43be64de672522a66aab46bc05239e28d458" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "bin_2d",
position = "identity"
)
data:image/s3,"s3://crabby-images/86b45/86b45cd1034fa0bd63ce25be39d49812085195ab" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_bin_2d(
geom = "point"
)
data:image/s3,"s3://crabby-images/2857b/2857b2eca040be38bddfb893757c10550188eeb4" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point(
stat = "bin_2d"
)
data:image/s3,"s3://crabby-images/87c66/87c66ee9d9685fa5d3074312d0600966d4fba82d" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "point",
stat = "bin_2d",
mapping = aes(size = after_stat(count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/46237/462373fc97eeef0f95b0ef013054bc56678d2eea" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "tile",
stat = "bin_2d",
mapping = aes(fill = after_stat(count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/9473e/9473e6c5e217be4070d051f63f83c361aa3b0d15" alt=""
28.13 stat_bin_hex()
stat_bin2d()的六边形版本
Computed variables
- count: number of points in bin
- density: density of points in bin, scaled to integrate to 1
- ncount: count, scaled to maximum of 1
- ndensity: density, scaled to maximum of 1
默认几何形状
- geom_hex()
适用几何形状
- geom_hex()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "hex",
stat = "binhex",
position = "identity"
)
data:image/s3,"s3://crabby-images/222cb/222cba838fc645de18fa028c0a2e88efa12b890d" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_bin_hex(
geom = "hex"
)
data:image/s3,"s3://crabby-images/ed9c4/ed9c46501fdf058b95bcde283794b6bd527e65e1" alt=""
data:image/s3,"s3://crabby-images/e09fd/e09fdceb1862e822745d56de5dc6c2066c800029" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "binhex",
mapping = aes(label = stage(NULL, after_stat = count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/734df/734df7a7c711ac4a1c743c9020a8a4ebaa094a70" alt=""
28.14 stat_density_2d()
二维核密度估计,二维版本的stat_density()
- 不计算等高线 (
contour = FALSE
)- count: number of points in bin
- density: density of points in bin, scaled to integrate to 1
- ncount: count, scaled to maximum of 1
- ndensity: density, scaled to maximum of 1
- count: number of points in bin
- 计算等高线 (
contour = TRUE
)- contour lines, for
stat_contour()
等高线 - contour bands, for
stat_contour_filled()
等高带 - Contours line types by contour_var = (
density
,ndensity
, andcount
)
- contour lines, for
适用几何形状
- geom_density_2d() / geom_raster() / goem_tile() / geom_path() / geom_point() / geom_polygon()
28.14.1 先看看有等高线的情形
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "path",
params = list(contour = TRUE),
position = "identity"
)
data:image/s3,"s3://crabby-images/aa114/aa114c7312a77997bc4cb59e214de2918d121c4a" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_density_2d(
contour = TRUE
)
data:image/s3,"s3://crabby-images/f7d92/f7d92db147b62e8df673a6ba14460c235198026a" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_density_2d()
data:image/s3,"s3://crabby-images/3e2ce/3e2ce7f3c888d7e9c6f427f5dd350265fa5da0cc" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_path(
stat = "density_2d",
contour = TRUE
)
data:image/s3,"s3://crabby-images/64170/64170311f1422ddbf2bffd8e4304cfa8c57703a1" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "point",
params = list(contour = TRUE),
position = "identity"
)
data:image/s3,"s3://crabby-images/29f5c/29f5c7f6de554fd6ac76a4b047c1fb26ed061f58" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "polygon",
mapping = aes(fill = after_stat(level)),
params = list(contour = TRUE),
position = "identity"
)
data:image/s3,"s3://crabby-images/16668/166683484409f65b9be75d8af97880b12bb974aa" alt=""
28.14.2 看看无等高线的情形
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "raster",
mapping = aes(fill = after_stat(density)),
params = list(contour = FALSE),
position = "identity"
)
data:image/s3,"s3://crabby-images/8b9fd/8b9fdbdc5361c06e4304d5c4343d4794feae9bbe" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "tile",
mapping = aes(fill = after_stat(count)),
params = list(contour = FALSE),
position = "identity"
)
data:image/s3,"s3://crabby-images/e4273/e4273751c0f4502648c9d68e69e9bec51f191b67" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
stat_density_2d(
geom = "tile",
mapping = aes(fill = after_stat(density)),
contour = FALSE
)
data:image/s3,"s3://crabby-images/28328/28328795fc1332d94a269367d4fbb66b27005a56" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_tile(
stat = "density_2d",
mapping = aes(fill = after_stat(density)),
contour = FALSE
)
data:image/s3,"s3://crabby-images/59c1e/59c1e5893ef9ab1d23b2d2ae0fde74eae8e71317" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "density_2d",
geom = "point",
mapping = aes(size = after_stat(count)),
params = list(n = 20, contour = FALSE),
position = "identity"
)
data:image/s3,"s3://crabby-images/cea5b/cea5bc6fc46b9d11825809447563f717f259c0a5" alt=""
28.15 stat_ellipse()
假定数据服从多元分布,计算椭圆图形需要的参数
Computed variables
- x
- y
默认几何形状
- geom_path()
适用几何形状
- geom_path() /geom_polygon()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
layer(
stat = "ellipse",
geom = "path",
params = list(type = "norm", linetype = 2),
position = "identity"
)
data:image/s3,"s3://crabby-images/48a6b/48a6bdc98130fa4ec54f26398c21a8e5b2d8b4fc" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
stat_ellipse(
geom = "path",
type = "norm",
linetype = 2
)
data:image/s3,"s3://crabby-images/52211/52211ccb90d8bda100cd3484e3284babedd24193" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
geom_point() +
geom_path(
stat = "ellipse",
type = "norm",
linetype = 2
)
data:image/s3,"s3://crabby-images/0086f/0086fe1bdc7b2d0b9abf0ea304d9bc94d9f4eb6a" alt=""
可以根据 Computed variables
画出更多的几何形状
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_point() +
layer(
stat = "ellipse",
geom = "path",
mapping = aes(color = after_stat(y)),
params = list(type = "norm"),
position = "identity"
)
data:image/s3,"s3://crabby-images/3646c/3646c13421ba1131dcfb7f63a1437733cf381db5" alt=""
28.16 stat_summary
每一个x位置上, summary on y
说明
-
stat_summary()
operates on unique x or y; -
stat_summary_bin()
operates on binned x or y.
Summary functions
fun.data : Complete summary function. Should take numeric vector as input and return data frame as output
fun.min : min summary function (should take numeric vector and return single number)
fun : main summary function (should take numeric vector and return single number)
fun.max : max summary function (should take numeric vector and return single number)
适用几何形状
- geom_errorbar() / geom_pointrange() /geom_linerange() / geom_crossbar() /geom_point()
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
stat = "summary",
params = list(fun.data = "mean_cl_normal"),
geom = "errorbar",
position = "identity"
)
data:image/s3,"s3://crabby-images/c42f4/c42f4bc8dbbad31445b71ae7e57c7f2de935f233" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_depth_mm)) +
stat_summary(
fun.data = mean_cl_normal,
geom = "errorbar"
)
data:image/s3,"s3://crabby-images/14156/141561aeb4e9ce28f3c1a99ef9f52facf8d82b61" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
layer(
stat = "summary",
geom = "point",
mapping = aes(size = after_stat(ymin)),
position = "identity"
)
data:image/s3,"s3://crabby-images/187e9/187e9e8327935fc8a09e7fb068069d25c2d3a71c" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
layer(
geom = "point",
stat = "summary",
params = list(fun = "mean", color = "red", size = 5),
position = "identity"
)
data:image/s3,"s3://crabby-images/10606/106065c2530c2c0424df0f06efe8403234c4037a" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
geom = "point",
stat = "summary",
params = list(fun = median),
mapping = aes(y = stage(start = bill_length_mm, after_stat = y)),
position = "identity"
)
data:image/s3,"s3://crabby-images/3ee9b/3ee9b391e3cf16d3b7eea65df21a0f84bd559fc6" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
layer(
geom = "pointrange",
stat = "summary",
params = list(fun.data = ~mean_se(., mult = 5), color = "red", size = 2),
position = "identity"
)
data:image/s3,"s3://crabby-images/72748/72748cd4f998fb084bb69159818038a1a2687448" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "point",
fun = "mean",
color = "red",
size = 5
)
data:image/s3,"s3://crabby-images/e8406/e840610017f07dc77b4739a8c793383eb8b1f362" alt=""
penguins %>%
ggplot(aes( x = body_mass_g, y = species)) +
geom_jitter() +
stat_summary(
fun = mean,
geom = "point",
size = 5,
color = "red",
alpha = 1
)
data:image/s3,"s3://crabby-images/829fb/829fb7d6ec905e3d0d5d166bc374ca3378669416" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 5),
color = "red",
geom = "pointrange",
size = 2
)
data:image/s3,"s3://crabby-images/fc1d1/fc1d197ab7c4df029d43d63eedf9f2c2377f44cf" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
geom_pointrange(
stat = "summary",
fun.data = ~mean_se(., mult = 5),
color = "red",
size = 2
)
data:image/s3,"s3://crabby-images/27bd6/27bd6a58abd26634bb6da13e72d24a4833bf5948" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = mean_cl_boot,
color = "red",
geom = "pointrange"
)
data:image/s3,"s3://crabby-images/61806/61806e5a60e966a3c4a51571575a21720b58c072" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun = mean,
fun.min = min,
fun.max = max,
geom = "pointrange",
color = "red",
size = 5
)
data:image/s3,"s3://crabby-images/a9103/a91030639aa63d3442488895e45c63f90818e751" alt=""
penguins %>%
ggplot(aes(x = sex, y = bill_length_mm)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 5),
color = "red",
geom = "pointrange"
)
data:image/s3,"s3://crabby-images/b2b9d/b2b9de8de3245dd21d9a68ecb2694f689a921994" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm, group = sex)) +
geom_point() +
stat_summary(
fun.data = ~mean_se(., mult = 2),
color = "red",
geom = "pointrange"
)
data:image/s3,"s3://crabby-images/18ab6/18ab61df1e015f8f1a6a20fc20afcccf5a8ac370" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm, group = sex)) +
stat_summary(fun = mean,
fun.min = function(x) mean(x) - sd(x),
fun.max = function(x) mean(x) + sd(x),
geom = "pointrange") +
stat_summary(fun = mean,
geom = "line") +
facet_wrap(~ sex)
data:image/s3,"s3://crabby-images/8ca98/8ca98602fa90c08f51b0d05c8bedbb3978c0c470" alt=""
28.16.1 自定义函数
my_count <- function(x){
tibble(
y = length(x),
)
}
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
geom = "bar",
fun.data = my_count
)
data:image/s3,"s3://crabby-images/3ac78/3ac7826b90463c53b8b7b2e950730dc05a4a8ff8" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_bar(
stat = "summary",
fun.data = my_count,
)
data:image/s3,"s3://crabby-images/5fc5c/5fc5c1e5caa5d6717296d297508de2d6539b13d3" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
layer(
geom = "bar",
stat = "summary",
params = list(fun.data = my_count),
position = "identity"
)
data:image/s3,"s3://crabby-images/b5438/b54384d0e8a845aabfb8697adb92a34b453250e1" alt=""
28.16.2 添加文本
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "point",
fun = "mean",
color = "red",
size = 5
) +
stat_summary(
aes(label = after_stat(y)),
geom = "text",
fun.data = "mean_se",
color = "red",
size = 5
)
data:image/s3,"s3://crabby-images/13de6/13de61a668618b2146104ed5e1bdfd48a307737c" alt=""
n_fun <- function(x) {
data.frame(y = 62,
label = length(x),
color = ifelse(length(x) > 100, "red", "blue")
)
}
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_boxplot() +
geom_jitter() +
stat_summary(
fun.data = n_fun,
geom = "text"
)
data:image/s3,"s3://crabby-images/9a1c6/9a1c6597fc722b03cea086ba9d04fe54e73ea907" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "pointrange",
fun.data = "mean_cl_boot",
color = "red"
)
data:image/s3,"s3://crabby-images/41551/41551f764151ddaa1377ed56a358524258bb6a3c" alt=""
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
geom_point() +
stat_summary(
geom = "pointrange",
fun.data = ~ mean_se(., mult = 5),
color = "red",
size = 1
) +
stat_summary(
fun = "mean",
geom = "text",
mapping = aes(y = stage(bill_length_mm, after_stat = 30),
label = round(after_stat(y), 2)),
color = "blue",
size = 5
) +
stat_summary(
fun = "length",
geom = "text",
mapping = aes(y = stage(bill_length_mm, after_stat = 62),
label = after_stat(y)
),
color = "black",
size = 5
)
data:image/s3,"s3://crabby-images/cf580/cf580afb367e44c3e10c92763267a3ca85319153" alt=""
28.16.3 更多
calc_median_and_fill <- function(x, threshold = 40) {
tibble(
y = median(x),
fill = if_else(y < threshold, "red", "gray50")
)
}
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
fun.data = calc_median_and_fill,
geom = "bar"
)
data:image/s3,"s3://crabby-images/7bb47/7bb47d6305a9d453b68ab67270a6c175a53550c8" alt=""
calc_median_and_color <- function(x, threshold = 40) {
tibble(
y = median(x),
color = if_else(y < threshold, "red", "gray50")
)
}
penguins %>%
ggplot(aes(x = species, y = bill_length_mm)) +
stat_summary(
fun.data = calc_median_and_color,
geom = "point",
size = 5
)
data:image/s3,"s3://crabby-images/6b754/6b7545308793ec5b584bf625d2e2ca2a5713a88f" alt=""
penguins %>%
ggplot(aes(species, bill_depth_mm)) +
stat_summary(
fun.data = function(x) {
scaled_size <- length(x)/nrow(penguins)
mean_se(x) %>%
mutate(size = scaled_size)
}
)
data:image/s3,"s3://crabby-images/4e0bd/4e0bdecf0b95227a5d7631e94b7458b3e42ad0ac" alt=""
penguins %>%
ggplot(aes(species, bill_depth_mm)) +
geom_point(position = position_jitter(width = .2), alpha = .3) +
stat_summary(fun = mean,
na.rm = TRUE,
geom = "point",
color = "dodgerblue",
size = 4,
shape = "diamond") +
stat_summary(fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "dodgerblue") +
stat_summary(fun = mean,
na.rm = TRUE,
aes(group = 1),
geom = "line",
color = "dodgerblue",
size = .75)
data:image/s3,"s3://crabby-images/73338/7333895fbd30a0a57b0005268897297fbd3e6395" alt=""
penguins %>%
ggplot(aes(species, bill_depth_mm, group = sex, color = sex)) +
geom_point(
position = position_jitterdodge(
jitter.width = .2,
dodge.width = .7
),
alpha = .1
) +
stat_summary(
fun = mean,
na.rm = TRUE,
geom = "point",
shape = "diamond",
size = 4,
color = "black",
position = position_dodge(width = .7)
) +
stat_summary(
fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "black",
position = position_dodge(width = .7)
) +
scale_color_brewer(palette = "Set1")
data:image/s3,"s3://crabby-images/05a65/05a657db8fd38602da9066b1171f0ebce1be6124" alt=""
penguins %>%
ggplot(aes(species, bill_depth_mm, group = sex, color = sex)) +
geom_point(
position = position_jitterdodge(
jitter.width = .2,
dodge.width = .7
),
alpha = .1
) +
stat_summary(
fun = mean,
na.rm = TRUE,
geom = "point",
shape = "diamond",
size = 4,
color = "black",
position = position_dodge(width = .7)
) +
stat_summary(
fun.data = mean_cl_normal,
na.rm = TRUE,
geom = "errorbar",
width = .2,
color = "black",
position = position_dodge(width = .7)
) +
scale_color_brewer(palette = "Set1") +
facet_wrap(~sex)
data:image/s3,"s3://crabby-images/280f2/280f20b377e8fbecf9c88f507af8bb34c465da27" alt=""
28.17 stat_summary_bin
在落入x区间位置上的y,设定函数(也可以调整方向,对落入y区间位置的每个x,设定函数)
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
layer(
stat = "summary_bin",
geom = "bar",
params = list(fun = mean, color = "red", orientation = 'x'),
position = "identity"
)
data:image/s3,"s3://crabby-images/78a45/78a45103c0d9f018022b158747709b059f71bfc7" alt=""
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'x' # bin on x axis, summary mean on y
)
data:image/s3,"s3://crabby-images/dc889/dc889d422071ebcf8bbcb6d9da38ef9e3c1040f6" alt=""
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'y'
)
data:image/s3,"s3://crabby-images/48a53/48a535ce79562669fdf617b9316810c8b3c17c2c" alt=""
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
geom_bar(
stat = "summary_bin",
fun = mean,
color = "red"
)
data:image/s3,"s3://crabby-images/e34bb/e34bbaedcc68b5d0fb2abc638688a19845007bd2" alt=""
penguins %>%
ggplot(aes(x = bill_depth_mm, y = bill_length_mm)) +
stat_summary_bin(
fun = mean,
color = "red",
geom = "bar",
orientation = 'y' # bin on y axis, summary mean on x
)
data:image/s3,"s3://crabby-images/7bdd9/7bdd99decd9806c309b1ae80141bce428a99111a" alt=""
28.18 stat_function()
函数曲线
Computed variables
- x: x values along a grid
- y: value of the function evaluated at corresponding x
默认几何形状
- geom_line()
适用几何形状
- geom_line() / geom_point() /geom_function()
tibble(x = runif(n = 100, min = -5, max = 5)) %>%
ggplot() +
layer(
stat = "function",
geom = "point",
params = list(fun = dnorm, args = list(mean = 0, sd = 0.5)),
position = "identity"
) +
xlim(-2, 2)
data:image/s3,"s3://crabby-images/e2862/e28624c9cb59f98ec9b89a8dfcd2dc28abf1306c" alt=""
tibble(x = runif(n = 100, min = -5, max = 5)) %>%
ggplot() +
layer(
stat = "function",
geom = "point",
params = list(fun = ~ 0.5*exp(-abs(.x))),
position = "identity"
) +
xlim(-2, 2)
data:image/s3,"s3://crabby-images/779ff/779fffe79ba9cc01d5a9fff04386c857dc1d22ac" alt=""
28.19 stat_spoke()
将角度和半径转换为xend和yend,可以看作是geom_segment()
另外一种形式
penguins %>%
mutate(angle = flipper_length_mm / (2*pi) ) %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "identity",
geom = "spoke",
mapping = aes(angle = angle),
params = list(radius = 0.5),
position = "identity"
)
data:image/s3,"s3://crabby-images/78e25/78e2528e8f2e9538c85edec39619a1c82d9e0478" alt=""
penguins %>%
mutate(angle = flipper_length_mm / (2*pi) ) %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
geom_spoke(
mapping = aes(angle = angle),
radius = 0.5
)
data:image/s3,"s3://crabby-images/26267/262673bb4f236601cf6b61c9a5dea86e967fb62c" alt=""
28.20 stat_quantile()
分位数回归
Computed variables
- quantile: quantile of distribution
默认几何形状
- geom_quantile()
适用几何形状
- geom_line() / geom_point() / geom_quantile()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "quantile",
geom = "quantile",
params = list(quantiles = c(0.25, 0.5, 0.75)),
position = "identity"
)
data:image/s3,"s3://crabby-images/3436e/3436e11565f10f3b48a99523a7a309c25543947a" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
stat = "quantile",
geom = "point",
mapping = aes(color = after_stat(quantile)),
params = list(quantiles = c(0.25, 0.5, 0.75)),
position = "identity"
)
data:image/s3,"s3://crabby-images/d7088/d70883586815999968a7e52fc989500695c908da" alt=""
28.21 stat_summary_2d()
落在x和y(长方形)区域上, summary on z
文档说stat_summary_2d() is a 2d variation of stat_summary().
个人觉得不完全准确
看参数stat_summary() 是对每一个
x
统计汇总summary,有多少个唯一的x, 就有多少个value.而stat_summary_2d() 有 bin的参数,它是对落在(x,y)构成的具有一定binwidth的长方形区域内的
z
统计汇总. 有多少个长方形,就有多少个value.
离散变量是正确的,但对应连续变量不准确。
Aesthetics
- x: horizontal position
- y: vertical position
- z: value passed to the summary function
Computed variables
-
x, y
: Location -
value
: Value of summary statistic.
默认几何形状
-
geom_tile()
for stat_summary_2d()
-
geom_hex()
for stat_summary_hex()
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
layer(
stat = "summary_2d",
geom = "tile",
params = list(fun = ~ sum(.x^2)),
position = "identity"
)
data:image/s3,"s3://crabby-images/370b5/370b564b21505daa69333698b26e72907edf8f14" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_2d(
geom = "point",
fun = ~ sum(.x^2), # summary statistic for z
mapping = aes(size = after_stat(value))
)
data:image/s3,"s3://crabby-images/4dfef/4dfefc1e1dbb4762b4261b2f9522f1559dab7105" alt=""
28.21.1 测试
## # A tibble: 329 × 2
## bill_length_mm bill_depth_mm
## <dbl> <dbl>
## 1 39.1 18.7
## 2 39.5 17.4
## 3 40.3 18
## 4 36.7 19.3
## 5 39.3 20.6
## 6 38.9 17.8
## 7 39.2 19.6
## 8 41.1 17.6
## 9 38.6 21.2
## 10 34.6 21.1
## # ℹ 319 more rows
说明有4个重叠的点。
sum
是一个点一个位置
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "sum",
mapping = aes(label = after_stat(n), color = as.factor(after_stat(n)) ),
params = list(size = 4),
position = "identity"
)
data:image/s3,"s3://crabby-images/bee9c/bee9ce3b59fbdf3bc104456f75c23931ce3e9801" alt=""
bin_2d
是一个bin一个统计
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
layer(
geom = "text",
stat = "bin_2d",
mapping = aes(label = stage(NULL, after_stat = count)),
position = "identity"
)
data:image/s3,"s3://crabby-images/a5452/a54529ceeb8894262922783bfbf9627dd73b693d" alt=""
stat_summary_2d
也是一个bin一个位置
n_fun <- function(z) {
length(z)
}
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_2d(
fun = n_fun,
geom = "text",
mapping = aes(label = after_stat(value))
)
data:image/s3,"s3://crabby-images/70f65/70f653e18bf767b220b15d9d0c2dc9d6b66b4f64" alt=""
28.22 stat_summary_hex()
落在x和y(六边形)区域上, summary on z
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
layer(
stat = "summary_hex",
geom = "tile",
params = list(fun = ~ sum(.x^2), binwidth = c(0.5, 0.2)),
position = "identity"
)
data:image/s3,"s3://crabby-images/2d55a/2d55a81f100a355268db24977bf51a8bf89da190" alt=""
penguins %>%
ggplot(aes(x = bill_length_mm, y = bill_depth_mm, z = body_mass_g)) +
stat_summary_hex(
geom = "tile",
fun = ~ sum(.x^2), # summary statistic for z
binwidth = c(0.5, 0.2) # Numeric vector giving bin width in both vertical and horizontal directions
)
data:image/s3,"s3://crabby-images/16f95/16f95f52326a81c61313f3a5a1e0f33f57145fb9" alt=""
28.23 stat_contour() and stat_contour_filled()
等高线、等高面,需要提供x,y,z映射
Computed variables
- level: Height of contour. For contour lines, this is numeric vector that represents bin boundaries. For contour bands, this is an ordered factor that represents bin ranges.
- level_low: level_high, level_mid (contour bands only) Lower and upper bin boundaries for each band, as well the mid point between the boundaries.
- nlevel: Height of contour, scaled to maximum of 1.
- piece: Contour piece (an integer).
默认几何形状
- geom_contour() / geom_contour_filled()
适用几何形状
- geom_contour() / geom_contour_filled()
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
layer(
stat = "contour",
geom = "path",
mapping = aes(colour = after_stat(level)),
position = "identity"
)
data:image/s3,"s3://crabby-images/d4e29/d4e29c5116c141ef0f35348f63ed2f703c009a0b" alt=""
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
stat_contour(
geom = "path",
mapping = aes(colour = after_stat(level))
)
data:image/s3,"s3://crabby-images/ed57b/ed57ba3d41945060151317e0fb473619b7ad3c14" alt=""
penguins %>%
mutate(
flipper_length_mm = flipper_length_mm %/% 10,
body_mass_g = body_mass_g %/% 10
) %>%
ggplot(aes(x = flipper_length_mm, y = body_mass_g, z = bill_length_mm)) +
geom_contour(
aes(colour = after_stat(level))
)
data:image/s3,"s3://crabby-images/e09a6/e09a6f4dcf73f755c9bae021ad66216a4b450e4f" alt=""
28.24 课后作业
- 写成对应的
stat_***()
版本和geom_***()
版本
library(tidyverse)
library(palmerpenguins)
penguins <- penguins %>% drop_na()
ggplot() +
layer(
data = penguins,
mapping = aes(x = species, y = bill_length_mm, color = fct_rev(sex)),
stat = "summary",
params = list(fun = "mean"),
geom = "point",
position = position_dodge(width = 0.5)
)
data:image/s3,"s3://crabby-images/b5533/b55336080db2ad1360ae7746a2538ef9bdfb9127" alt=""
- 写出对应的
stat_***()
版本和layer()
版本
penguins %>%
ggplot(aes(species, island)) +
geom_count(aes(size = after_stat(n)), show.legend = FALSE)
data:image/s3,"s3://crabby-images/2ff55/2ff55056261e2b3804a7cb9d8fc9d42714094cb2" alt=""
- 上题用layer写,但要求不用
stat = "sum"
, 而用stat = "summary"