第16章 ggplot2示例
16.1 柱状图示例
我们以R自带的mpg数据库为例进行作图演示。mpg数据库包含了美国环境保护署对38种汽车的观察数据,包括11个变量和234条记录。变量说明如下:
变量名称 | 变量说明 | 变量名称 | 变量说明 | 变量名称 | 变量说明 | ||
---|---|---|---|---|---|---|---|
manufacturer | 制造商 | | | model | 汽车型号 | | | displ | 发动机排量 |
year | 生产日期 | | | cyl | 气缸数量 | | | trans | 变速器类型 |
drv | 驱动类型 | | | cty | 每加仑城市道路英里数 | | | hwy | 每加仑高速公路英里数 |
fl | 汽油类型 | | | class | 汽车类型 | | |
# 加载包
library(tidyverse) # 作图包
library(cowplot) # 排版包
# 加载数据
data(mpg)
# 变量转换,在调整柱状图cyl顺序时需要将x轴的变量类型转为分类变量
mpg <- mpg %>% mutate(across(c(cyl,drv,fl), as_factor))
# 关于cyl的单一柱状图
p1 <- ggplot(data = mpg, aes(x = cyl)) +
geom_bar() +
labs(
title = "气缸数量柱状图",
x = "气缸数量",
y = "计数"
)
# 区分drv水平的cyl柱状图
p2 <- ggplot(data = mpg, aes(x = cyl, fill = drv)) +
geom_bar(position = position_dodge2()) +
labs(
title = "不同驱动水平的柱状图",
x = "气缸数量",
y = "计数"
)
# 调整cyl顺序
p3 <- ggplot(data = mpg, aes(x = cyl, fill = drv)) +
geom_bar(position = position_dodge2()) +
scale_x_discrete(limits = c("5","4","6","8")) +
labs(
title = "气缸数量顺序调整的柱状图",
x = "气缸数量",
y = "计数"
)
# 插入文本
p4 <- mpg %>%
group_by(cyl,drv) %>%
summarise(count = n(), .groups = "drop") %>%
ggplot(aes(cyl, count)) +
geom_col( # geom_bar多用于单变量的柱状图计数,geom_col多用于两个变量的柱状图
aes(fill = drv),
position = position_dodge2(preserve = "single") # 柱子宽度默认为 0.9,如果不设置 preserve = 'single',则宽度为整个分组的宽度
) +
geom_text(
aes(label = count), # 映射对象,即文本内容
position = position_dodge2(width = 0.9, preserve = "single"), # 文本宽度和位置需要与geom_col中的设定一致
vjust = -0.2, # 垂直上调
hjust = 0.5 # 水平调整
) +
labs(
title="插入计数文本",
x = "气缸数量",
y = "计数"
)
plot_grid(p1, p2, p3, p4, nrow = 2)
16.2 散点图示例
我们继续以R自带的mpg数据库为例进行作图演示。
# cty-hwy的单一散点图
p1 <- ggplot(data = mpg, aes(x = cty, y = hwy)) +
geom_point() +
labs(title = "单一散点图")
# 区分drv水平的cty-hwy散点图
p2 <- ggplot(data = mpg, aes(x = cty, y = hwy, color = drv)) +
geom_point() +
labs(title = "不同drv水平的散点图")
# 图例调整
p3 <- ggplot(data = mpg, aes(x = cty, y = hwy, color = drv)) +
geom_point() +
scale_color_discrete( # 调整图例
name = "drive", # 声明图例标题
breaks = c("f", "r", "4"), # 声明图例元素
labels = c("front wheel drive", "rear wheel drive", "4 wheel drive") #声明图例元素标签
) +
theme(
legend.position = c(0.1,0.95), # 设置图例位置 设为"top""bottom"等字符时位于图外,设为两个数字的向量时位于图内
legend.justification = c(0,1) # 设置图例锚点
) +
labs(title = "图例调整的散点图")
# 增加趋势线
p4 <- ggplot(data = mpg, aes(x = cty, y = hwy, color = factor(drv))) +
geom_point() +
geom_smooth(method = "lm", formula = "y~x") +
scale_color_discrete(
name = "drive",
breaks = c("f", "r", "4"),
labels = c("front wheel drive", "rear wheel drive", "4 wheel drive")
) +
theme(
legend.position = c(0.1,0.95)
) +
labs(title = "增加趋势线的散点图")
plot_grid(p1, p2, p3, p4, nrow = 2)
16.3 多个数据框作图
当我们需要将多个数据框做成一张图时,可以在geom_*()
函数中分别使用data=<DATA>
声明需要作图的数据框。
# 创建两个数据框
bmi_df1 <- data.frame(
day = 1:6,
bmi = c(23, 24, 25, 24.5, 25, 25.5)
)
bmi_df2 <- data.frame(
day = 1:6,
bmi = c(24, 24, 23, 23.5, 22, 23.5)
)
ggplot() + # 声明作图
geom_line(
data = bmi_df1, # 设置数据框
mapping = aes(x=day, y=bmi, color="数据框1"), # 设置映射元素
size = 1.5 # 设置线条粗细
) +
geom_line(
data = bmi_df2, # 设置数据框
mapping = aes(x=day, y=bmi, color="数据框2"), # 设置映射元素
size = 1.5 # 设置线条粗细
) +
scale_color_manual(
name = "数据框", #声明图例标题
breaks = c("数据框1", "数据框2"), # 声明图例元素
values = c("#80c97f","#a68dc8") # 声明图例元素对应颜色
) +
scale_x_continuous(breaks=1:6) + # 设置x轴刻度
scale_y_continuous(breaks=seq(21.5, 26, 0.5)) + # 设置y轴刻度
theme_classic() + # 选择theme_classic主题
theme(
legend.position = c(0.1, 0.95),
legend.title = element_blank(),
legend.background = element_rect(color = "black") # 给图例添加黑边框
)
16.4 双向柱状图
# 加载数据框
data(mpg)
# 创建仅包含1999年hwy均值数据的数据框
hwy_df1 <- mpg %>%
filter(year==1999) %>%
group_by(class) %>%
summarise(mean_hwy = -mean(hwy)) %>% # 将hwy均值取负数,以便反向做柱状图
select(class, mean_hwy)
# 创建仅包含2008年hwy均值数据的数据框
hwy_df2 <- mpg %>%
filter(year==2008) %>%
group_by(class) %>%
summarise(mean_hwy = mean(hwy)) %>%
select(class, mean_hwy)
ggplot() +
geom_col(
data = hwy_df1, # 设置数据框
mapping = aes(x = class, y = mean_hwy, fill = "Year 1999") # 设置映射元素
) +
geom_text(
data = hwy_df1,
mapping = aes(
x = class,
y = mean_hwy,
label = sprintf("%0.1f", round(abs(mean_hwy), digits = 1)) # 设置文本内容为精确到小数点后一位
),
position = position_dodge2(width = 0.9, preserve = "single"), # 文本宽度和位置需要与geom_col中的设定一致
vjust = 0.4, # 垂直下调
hjust = -0.1 # 水平调整
) +
geom_col(
data = hwy_df2, # 设置数据框
mapping = aes(class, mean_hwy, fill = "Year 2008") # 设置映射元素
) +
geom_text(
data = hwy_df2,
mapping = aes(
x = class,
y = mean_hwy,
label = sprintf("%0.1f", round(mean_hwy, digits = 1)) # 设置文本内容为精确到小数点后一位
),
position = position_dodge2(width = 0.9, preserve = "single"), # 文本宽度和位置需要与geom_col中的设定一致
vjust = 0.4, # 垂直下调
hjust = 1.1 # 水平调整
) +
scale_fill_manual(
name = "Year", #声明图例标题
breaks = c("Year 1999", "Year 2008"), # 声明图例元素
values = c("#80c97f","#a68dc8") # 声明图例元素对应颜色
) +
scale_y_continuous(
breaks = seq(-30,30,5), # 设置y轴刻度分隔
limits = c(-30, 30), # 设置y轴最大、最小值
labels = abs(seq(-30,30,5)) # 设置y轴刻度
) +
labs(
title = "Average high way speed of vehicle classes by year",
x = "Vehicle classes",
y = "Average high way speed"
) +
coord_flip() + # 坐标轴翻转
theme(
legend.position = "right", # 设置图例位置为"right"
legend.justification = "top", # 设置图例锚点为"top"
legend.title = element_blank(), # 移除图例标题
panel.grid = element_blank(), # 移除网格
panel.background = element_blank(), # 移除背景
plot.title = element_text(hjust = 0.5) # 标题居中
)
16.5 双y轴图
# 加载数据框
data(mpg)
# 以cyl和year分组,分别计算不同cyl的计数以及cty的中位数
mpg %>%
mutate(year = as.factor(year)) %>%
group_by(cyl, year) %>%
summarise(
count = n(),
median_cty = median(cty)
) %>%
ggplot() + # 开始绘图
geom_col( #绘制柱状图
aes(x = cyl, y = count, fill = year),
position = position_dodge(preserve = "single") # 将柱子设为紧凑型
) +
scale_fill_manual(
values = c("#80c97f","#a68dc8") # 设置第一批数据year的颜色
) +
geom_point(
aes(
x = cyl,
y = median_cty*2, # 由于要把两批数据放在同一个图中,所以需要按照比例缩放数据
fill = year
),
shape = 21, # 选择第21种散点类型,即有边框的点
size = 4, # 设置散点大小
color = "red", # 设置散点边框颜色
stroke = 1 # 设置散点边框大小
) +
geom_line(
aes(
x = cyl,
y = median_cty*2, # 由于要把两批数据放在同一个图中,所以需要按照比例缩放数据
group = year # 以year分组,分别画线
),
color = "black" # 将点以黑线连接
) +
scale_color_manual(
values = c("#80c97f","#a68dc8") # 设置第二批数据year的颜色
) +
scale_y_continuous(
name = "Count", # 设置左侧y轴标题
expand = c(0,0), # 不进行y轴拓展
limits = c(0,60), # 设置左侧y轴范围
breaks = seq(0,60,10), # 设置左侧y轴截断点
sec.axis = sec_axis( # 设置右侧y轴
transform = ~./2, # 由于作图时数据都扩大了2倍,y轴刻度呈现时需要缩小对应的倍数
name = "Median city speed", # 设置右侧y轴标题
breaks = seq(0,30,5) # 设置右侧y轴截断点
)
) +
theme(
axis.line.y.right = element_line(color = "red"), # 设置右侧y轴颜色
axis.text.y.right = element_text(color = "red"), # 设置右侧y轴标尺值颜色
axis.ticks.y.right = element_line(color = "red"), # 设置右侧y轴标尺颜色
legend.position = "right", # 设置图例位置为"right"
legend.justification = "top", # 设置图例锚点为"top"
)
16.6 棒棒糖图与哑铃图
在展示生存分析数据时,我们有时会用到由一根细线和一个点组成的棒棒糖图。如果将线的两端用点标记出来,则能组成哑铃图。
set.seed(1)
# 生成棒棒糖图数据框
df_1 <- data.frame(
ID = letters[1:5],
time_start = rnorm(n=5, mean=20, sd=5), # 设置起始观测时间
time_obs = rnorm(n=5, mean=10, sd=3), # 设置观测窗口期
group = 1
)
df_2 <- df_1 %>%
mutate(
time_start = time_start + rnorm(n=5, mean=20, sd=3),
time_obs = time_obs + rnorm(n=5, mean=10, sd=2),
group = 2
)
df <- rbind(df_1, df_2)
df$group <- as.factor(df$group)
p1 <- ggplot(
data = df_1,
aes(x=time_start, y=ID)
) +
geom_segment(
aes(x=time_start, xend=time_obs, y=ID, yend=ID) # 作出起止线段
) +
geom_point(
shape=16, size=4, color="#2ca25f" # 设置散点类型
) +
labs(x = "观测时间")
p2 <- ggplot(
data = df,
aes(
x = time_start,
y = reorder(ID, time_start), # 将y轴标尺(ID)依照time_start进行排列
color = group) # 设置散点以group的形式标注颜色
) +
geom_line(aes(group = reorder(ID, time_start)), color="black") + # 将散点以个ID组的形式连接
geom_point(shape=16, size=4) + # 设置散点类型
labs(y="ID", x="观测时间") +
scale_color_manual(values=c("#FC4E07","#36BED9"))+ # 设置散点颜色
theme(
legend.background = element_blank(), # 清除图例背景色
legend.position = c(0.85,0.12) # 设置图例位置
)
plot_grid(p1, p2)