# 第 85 章 探索性数据分析-哺乳动物脑量与体重的关系

## 85.1 读取数据

library(tidyverse)

janitor::clean_names()
data
## # A tibble: 70 × 3
##   species         body_weight_kg brain_weight_g
##   <chr>                    <dbl>          <dbl>
## 1 Arctic fox                3.38           44.5
## 2 Owl monkey                0.48           15.5
## 3 Mountain beaver           1.35            8.1
## 4 Cow                     465             423
## 5 Grey wolf                36.3            19.5
## 6 Goat                     27.7           115
## # … with 64 more rows

## 85.2 数据变换

tb <- data %>%
filter(body_weight_kg > 0, brain_weight_g > 0) %>%
mutate(
across(where(is.numeric), log10)
)
tb
## # A tibble: 69 × 3
##   species         body_weight_kg brain_weight_g
##   <chr>                    <dbl>          <dbl>
## 1 Arctic fox               0.529          1.65
## 2 Owl monkey              -0.319          1.19
## 3 Mountain beaver          0.130          0.908
## 4 Cow                      2.67           2.63
## 5 Grey wolf                1.56           1.29
## 6 Goat                     1.44           2.06
## # … with 63 more rows

## 85.3 线性模型

m <- lm(brain_weight_g ~ body_weight_kg, data = tb)

df <- tb %>% modelr::add_residuals(m)

## 85.4 可视化

library(colorspace)

df %>%
ggplot(aes(x = body_weight_kg, y = brain_weight_g)) +
geom_smooth(method = "lm", se = TRUE, color = "gray60", fill = "gray70", linetype = "dashed") +
geom_point(aes(fill = resid), color = "black", size = 4, shape = 21) +
ggrepel::geom_text_repel(aes(label = species)) +
#scale_x_log10() +
#scale_y_log10() +
ggthemes::theme_economist() +
theme(
legend.position = "none"
) +
scale_fill_continuous_diverging(palette = "Green-Orange")