# 第 62 章 贝叶斯推断

library(tidyverse)
library(tidybayes)
library(rstan)
library(brms)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

• 频率学派，是从数据出发
• 贝叶斯。先假定参数有一个分布，看到数据后，再重新分配可能性。

Statistical inference is the process of using observed data to infer properties of the statistical distributions that generated that data.

$\Pr(\text{parameters} | \text{data}).$

$\underbrace{\Pr(\text{parameters} | \text{data})}_{\text{posterior}} = \frac{\overbrace{\Pr(\text{data} | \text{parameters})}^{\text{likelihood}} \overbrace{\Pr(\text{parameters})}^{\text{prior}}}{\underbrace{\Pr(\text{data})}_{evidence}} .$

## 62.1 学生身高的分布？

d <- readr::read_rds(here::here('demo_data', "height_weight.rds"))
head(d)
##   sex height weight
## 1 boy  173.7  59.93
## 2 boy  170.9  60.03
## 3 boy  182.1  62.77
## 4 boy  176.2  55.54
## 5 boy  167.1  56.65
## 6 boy  183.1  60.61

d %>%
summarise(
across(height, list(mean = mean, median = median, max = max, min = min, sd = sd))
)
##   height_mean height_median height_max height_min
## 1       164.9         164.2      185.2      142.3
##   height_sd
## 1     7.306
d %>%
ggplot(aes(x = height)) +
geom_density()

## 62.2 推断

• 均值可能是160，162，170，172，…, 或者说这个均值在一个范围之内，在这个范围内，有些值的可能性大，有些值可能性较低。比如，认为这值游离在(150,180)范围，其中168左右的可能最大，两端的可能性最低。如果寻求用数学语言来描述，它符合正态分布的特征

• 方差也可以假设在(0, 50)范围内都有可能，而且每个位置上的概率都相等

library(patchwork)
p1 <-
ggplot(data = tibble(x = seq(from = 100, to = 230, by = .1)),
aes(x = x, y = dnorm(x, mean = 168, sd = 20))) +
geom_line() +
xlab("height_mean") +
ylab("density")

p2 <-
ggplot(data = tibble(x = seq(from = -10, to = 55, by = .1)),
aes(x = x, y =  dunif(x, min  = 0,   max = 50))) +
geom_line() +
xlab("height_sd") +
ylab("density")

p1 + p2

### 62.2.1 参数空间

d_grid <- crossing(
mu = seq(from = 150, to = 190, length.out = 1000),
sigma = seq(from = 4,   to = 9,   length.out = 1000)
)

d_grid
## # A tibble: 1,000,000 × 2
##      mu sigma
##   <dbl> <dbl>
## 1   150  4
## 2   150  4.01
## 3   150  4.01
## 4   150  4.02
## 5   150  4.02
## 6   150  4.03
## # … with 999,994 more rows

### 62.2.2 likelihood

grid_function <- function(mu, sigma) {
dnorm(d$height, mean = mu, sd = sigma, log = T) %>% sum() } d_grid %>% mutate(log_likelihood = map2_dbl(mu, sigma, grid_function))  ### 62.2.3 prior d_grid %>% mutate(prior_mu = dnorm(mu, mean = 178, sd = 20, log = T), prior_sigma = dunif(sigma, min = 0, max = 50, log = T))  ### 62.2.4 posterior d_grid <- d_grid %>% mutate(log_likelihood = map2_dbl(mu, sigma, grid_function)) %>% mutate(prior_mu = dnorm(mu, mean = 168, sd = 20, log = T), prior_sigma = dunif(sigma, min = 0, max = 50, log = T)) %>% mutate(product = log_likelihood + prior_mu + prior_sigma) %>% mutate(probability = exp(product - max(product))) head(d_grid) ## # A tibble: 6 × 7 ## mu sigma log_likelihood prior_mu prior_sigma ## <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 150 4 -2179. -4.32 -3.91 ## 2 150 4.01 -2175. -4.32 -3.91 ## 3 150 4.01 -2171. -4.32 -3.91 ## 4 150 4.02 -2167. -4.32 -3.91 ## 5 150 4.02 -2163. -4.32 -3.91 ## 6 150 4.03 -2159. -4.32 -3.91 ## # … with 2 more variables: product <dbl>, ## # probability <dbl> d_grid %>% ggplot(aes(x = mu, y = sigma, z = probability)) + geom_contour() + labs( x = expression(mu), y = expression(sigma) ) + coord_cartesian( xlim = range(d_grid$mu),
ylim = range(d_grid\$sigma)
) +
theme(panel.grid = element_blank())
d_grid %>%
ggplot(aes(x = mu, y = sigma)) +
geom_raster(
aes(fill = probability),
interpolate = T
) +
scale_fill_viridis_c(option = "A") +
labs(
x = expression(mu),
y = expression(sigma)
) +
theme(panel.grid = element_blank())

### 62.2.5 sampling from posterior

d_grid_samples <-
d_grid %>%
sample_n(size = 1e4, replace = T, weight = probability)
d_grid_samples %>%
ggplot(aes(x = mu, y = sigma)) +
geom_point(size = .9, alpha = 1/15) +
scale_fill_viridis_c() +
labs(x = expression(mu[samples]),
y = expression(sigma[samples])) +
theme(panel.grid = element_blank())
d_grid_samples %>%
select(mu, sigma) %>%
pivot_longer(
cols = everything(),
names_to = "key",
values_to = "value"
) %>%
ggplot(aes(x = value)) +
geom_density(fill = "grey33", size = 0) +
scale_y_continuous(NULL, breaks = NULL) +
xlab(NULL) +
theme(panel.grid = element_blank()) +
facet_wrap(~key, scales = "free")

### 62.2.6 最高密度区间

library(tidybayes)

d_grid_samples %>%
select(mu, sigma) %>%
pivot_longer(
cols = everything(),
names_to = "key",
values_to = "value"
) %>%
group_by(key) %>%
mode_hdi(value)
## # A tibble: 2 × 7
##   key    value .lower .upper .width .point .interval
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>
## 1 mu    165.   164.   166.     0.95 mode   hdi
## 2 sigma   7.38   6.60   8.04   0.95 mode   hdi