Chapter 5 R Data and information visualization

变量类型:分类变量、离散数值变量、连续数值变量

library(ggplot2)
library(ggridges)
library(RColorBrewer)
library(viridis)

5.1 一个分类变量:条形图、折线图

5.1.1 条形图

# 条形图。男生和女生数目比较
df.out %>% 
  count(性别, name = '数目' ) %>%
  ggplot(aes(x = 性别, y = 数目)) +
  geom_bar(stat = 'identity', colour="black")

5.1.2 折线图

# 折线图
df.out %>% 
  count(性别, name = '数目' ) %>%
  ggplot(aes(x = 性别, y = 数目, group = 1)) +
  geom_line() +
  geom_point()

5.2 两个数值变量:散点图

# 查看学生英语和数学成绩分布
df.out %>% 
  ggplot(aes(x = 语文, y = 数学)) +
  geom_point()

# 按语文成绩从低到高查看学生英语和数学成绩分布
df.out %>% 
  ggplot(aes(x = reorder(语文, 数学), y = 数学)) +
  geom_point()

# 按照班级查看学生英语和数学成绩分布
df.out %>% 
  ggplot(aes(x = reorder(语文, 数学), y = 数学)) +
  geom_point() +
  facet_grid(.~班级, scale="free", space="free_x") 

5.3 一个分类变量,一个数值变量:箱型图、小提琴图、山脊图

5.3.1 箱型图

# 箱型图绘制班级平均成绩
df.out %>% 
  ggplot(aes(x = 班级, y = 平均值)) +
  geom_boxplot()

# 以班级和性别分类学生平均成绩
df.out %>% 
  ggplot(aes(x = 性别, y = 平均值, fill = 性别)) +
  geom_boxplot()

df.out %>% 
  mutate(性别 = factor(性别)) %>%
  ggplot(aes(x = 性别, y = 平均值, fill = 性别)) +
  geom_boxplot() +
  geom_jitter() 

5.3.2 小提琴图

df.out %>% 
  ggplot(aes(x = 性别, y = 平均值)) +
  geom_violin()

df.out %>% 
  ggplot(aes(x = 性别, y = 平均值, fill = 性别)) +
  geom_violin() +
  geom_jitter() 

5.3.3 山脊图

df.out %>% 
  ggplot(aes(x = 平均值, y = 班级)) +
  geom_density_ridges()
## Picking joint bandwidth of 2.41

# 以颜色区分语文成绩
df.out %>% 
  ggplot(aes(x = 语文, y = 班级)) +
  geom_density_ridges_gradient(aes(fill=stat(x))) +
  scale_fill_gradientn(name = "", 
                       colours = colorRampPalette(rev(brewer.pal(11,'Spectral')))(32)) 
## Picking joint bandwidth of 2.24

# 以颜色区分区间内该分数学生占比
df.out %>% 
  ggplot(aes(x = 语文, y = 班级)) +
  geom_density_ridges_gradient(aes(fill = ..density..)) +
  scale_fill_gradientn(name = "", 
                       colours = colorRampPalette(rev(brewer.pal(11,'Spectral')))(32)) 
## Picking joint bandwidth of 2.24