5 模型可视化

代码提供：胡慧怡何舒欣李清仪于芊

5.1 PartI

library(modelsummary)
url <- 'https://vincentarelbundock.github.io/Rdatasets/csv/HistData/Guerry.csv'
dat <- read.csv(url) 
dat$Small <- dat$Pop1831 > median(dat$Pop1831)
#数据的快速概述
datasummary_skim(dat)

	Unique (#)	Mean	SD	Min	Median	Max
X	86	43.5	25.0	1.0	43.5	86.0
dept	86	46.9	30.4	1.0	45.5	200.0
Crime_pers	85	19754.4	7504.7	2199.0	18748.5	37014.0
Crime_prop	86	7843.1	3051.4	1368.0	7595.0	20235.0
Literacy	50	39.3	17.4	12.0	38.0	74.0
Donations	85	7075.5	5834.6	1246.0	5020.0	37015.0
Infants	86	19049.9	8820.2	2660.0	17141.5	62486.0
Suicides	86	36522.6	31312.5	3460.0	26743.5	163241.0
Wealth	86	43.5	25.0	1.0	43.5	86.0
Commerce	84	42.8	25.0	1.0	42.5	86.0
Clergy	85	43.4	25.0	1.0	43.5	86.0
Crime_parents	86	43.5	25.0	1.0	43.5	86.0
Infanticide	81	43.5	24.9	1.0	43.5	86.0
Donation_clergy	86	43.5	25.0	1.0	43.5	86.0
Lottery	86	43.5	25.0	1.0	43.5	86.0
Desertion	86	43.5	25.0	1.0	43.5	86.0
Instruction	82	43.1	24.8	1.0	41.5	86.0
Prostitutes	63	141.9	521.0	0.0	33.0	4744.0
Distance	86	208.0	109.3	0.0	200.6	539.2
Area	84	6147.0	1398.2	762.0	6070.5	10000.0
Pop1831	86	378.6	148.8	129.1	346.2	989.9

#平衡表(又名“表1”)，各子组的平均值不同:
datasummary_balance(~Small, dat)

## Warning in sanitize_datasummary_balance_data(formula, data): These variables
## were omitted because they include more than 50 levels: Department.

## Warning in datasummary_balance(~Small, dat): Please install the `estimatr`
## package or set `dinm=FALSE` to suppress this warning.

		FALSE (N=43)		TRUE (N=43)
		Mean	Std. Dev.	Mean	Std. Dev.
X		41.4	27.4	45.6	22.3
dept		46.0	36.6	47.7	23.0
Crime_pers		18040.6	7638.4	21468.2	7044.3
Crime_prop		8422.5	3406.7	7263.7	2559.3
Literacy		37.9	19.1	40.6	15.6
Donations		7258.5	6194.1	6892.6	5519.0
Infants		20790.2	9363.5	17309.6	7973.0
Suicides		42565.4	37074.1	30479.8	23130.9
Wealth		51.0	23.9	36.0	23.9
Commerce		42.7	24.6	43.0	25.7
Clergy		39.1	26.7	47.7	22.7
Crime_parents		54.2	25.2	32.8	19.9
Infanticide		37.9	25.7	49.1	23.1
Donation_clergy		52.3	24.0	34.7	22.9
Lottery		54.8	23.0	32.2	21.7
Desertion		41.7	25.9	45.3	24.2
Instruction		46.7	26.7	39.6	22.5
Prostitutes		52.8	93.1	230.9	724.1
Distance		228.7	116.7	187.2	98.4
Area		5989.0	1142.8	6305.0	1612.4
Pop1831		272.4	53.4	484.8	137.3
		N	Pct.	N	Pct.
Region	C	13	30.2	4	9.3
	E	9	20.9	8	18.6
	N	4	9.3	13	30.2
	S	12	27.9	5	11.6
	W	4	9.3	13	30.2
	NA	1	2.3	0	0.0
MainCity	1:Sm	10	23.3	0	0.0
	2:Med	33	76.7	33	76.7
	3:Lg	0	0.0	10	23.3

#相关表
datasummary_correlation(dat)

	X	dept	Crime_pers	Crime_prop	Literacy	Donations	Infants	Suicides	Wealth	Commerce	Clergy	Crime_parents	Infanticide	Donation_clergy	Lottery	Desertion	Instruction	Prostitutes	Distance	Area	Pop1831
X	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
dept	.92	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Crime_pers	−.12	−.20	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Crime_prop	−.28	−.29	.27	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Literacy	.09	.10	−.04	−.37	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Donations	.06	.27	−.04	−.13	−.13	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Infants	−.07	−.03	−.04	.27	−.41	.17	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.
Suicides	−.18	−.15	−.13	.52	−.37	−.03	.29	1	.	.	.	.	.	.	.	.	.	.	.	.	.
Wealth	−.08	−.08	−.12	.46	−.28	.08	.34	.42	1	.	.	.	.	.	.	.	.	.	.	.	.
Commerce	−.03	.05	.05	.41	−.58	.30	.39	.48	.48	1	.	.	.	.	.	.	.	.	.	.	.
Clergy	.15	.06	.26	−.07	−.17	.09	−.06	−.32	−.11	−.12	1	.	.	.	.	.	.	.	.	.	.
Crime_parents	−.16	−.07	−.20	.36	−.20	−.02	.06	.35	.22	.18	−.18	1	.	.	.	.	.	.	.	.	.
Infanticide	.01	−.07	.27	−.13	.32	−.15	−.24	−.08	−.22	−.28	−.01	−.09	1	.	.	.	.	.	.	.	.
Donation_clergy	−.08	.00	−.18	.30	−.38	.25	.10	.19	.34	.18	.30	.29	−.23	1	.	.	.	.	.	.	.
Lottery	−.21	−.11	.00	.43	−.36	.15	.42	.49	.48	.45	−.28	.28	−.35	.36	1	.	.	.	.	.	.
Desertion	.10	.03	.33	−.26	.40	−.04	.00	−.47	−.23	−.36	.25	−.39	.11	−.41	−.30	1	.	.	.	.	.
Instruction	−.10	−.11	.05	.39	−.98	.14	.43	.36	.31	.59	.21	.21	−.32	.40	.37	−.37	1	.	.	.	.
Prostitutes	.17	.14	−.05	−.33	.30	−.07	−.28	−.21	−.32	−.27	.20	−.03	.16	−.04	−.28	.04	−.26	1	.	.	.
Distance	−.10	.04	−.51	.25	−.28	.08	.23	.41	.40	.38	−.31	.26	−.16	.28	.28	−.44	.24	−.37	1	.	.
Area	−.18	−.08	.22	.09	−.23	.18	.16	.00	.06	.18	.08	−.20	−.23	.02	.23	.04	.20	−.41	.06	1	.
Pop1831	.17	.09	.27	−.26	.09	.00	−.23	−.17	−.31	−.05	.29	−.40	.34	−.22	−.47	.11	−.11	.48	−.37	−.01	1

#两个变量和两个统计数据，嵌套在子组中
datasummary(Literacy + Commerce ~ Small * (mean + sd), dat)

	FALSE		TRUE
	mean	sd	mean	sd
Literacy	37.88	19.08	40.63	15.57
Commerce	42.65	24.59	42.95	25.75

#估计一个线性模型并显示结果
mod <- lm(Donations ~ Crime_prop, data = dat)
modelsummary(mod)

	Model 1
(Intercept)	9065.287
	(1738.926)
Crime_prop	−0.254
	(0.207)
Num.Obs.	86
R2	0.018
R2 Adj.	0.006
AIC	1739.0
BIC	1746.4
Log.Lik.	−866.516
F	1.505

#估计5个回归模型，并排显示结果，并将它们保存到Microsoft Word文档中
models <- list(
  "OLS 1"     = lm(Donations ~ Literacy + Clergy, data = dat),
  "Poisson 1" = glm(Donations ~ Literacy + Commerce, family = poisson, data = dat),
  "OLS 2"     = lm(Crime_pers ~ Literacy + Clergy, data = dat),
  "Poisson 2" = glm(Crime_pers ~ Literacy + Commerce, family = poisson, data = dat),
  "OLS 3"     = lm(Crime_prop ~ Literacy + Clergy, data = dat)
)
modelsummary(models, output = "table.docx")

5.2 PartII

url <- 'https://vincentarelbundock.github.io/Rdatasets/csv/palmerpenguins/penguins.csv'
dat <- read.csv(url)

# rescale mm -> cm
dat$bill_length_cm <- dat$bill_length_mm / 10
dat$flipper_length_cm <- dat$flipper_length_mm / 10

mod <- lm(bill_length_cm ~ flipper_length_cm + species, data = dat)
library(modelsummary)

modelplot(mod)

modelplot(mod, coef_omit = 'Interc')

cm <- c('speciesChinstrap' = 'Chinstrap',
        'speciesGentoo' = 'Gentoo', 
        'flipper_length_cm' = 'Flipper length (cm)')

modelplot(mod, coef_map = cm)

models <- list(
  "Small model" = lm(bill_length_cm ~ flipper_length_cm, data = dat),
  "Medium model" = lm(bill_length_cm ~ flipper_length_cm + body_mass_g, data = dat),
  "Large model" = lm(bill_length_cm ~ flipper_length_cm + body_mass_g + species, data = dat))

modelsummary(models, statistic = 'conf.int')

	Small model	Medium model	Large model
(Intercept)	−0.726	−0.344	0.984
	[−1.356, −0.097]	[−1.245, 0.557]	[0.215, 1.752]
flipper_length_cm	0.255	0.222	0.095
	[0.224, 0.286]	[0.158, 0.285]	[0.048, 0.142]
body_mass_g		0.000	0.000
		[0.000, 0.000]	[0.000, 0.000]
speciesChinstrap			0.939
			[0.867, 1.011]
speciesGentoo			0.207
			[0.088, 0.326]
Num.Obs.	342	342	342
R2	0.431	0.433	0.817
R2 Adj.	0.429	0.430	0.815
AIC	369.0	369.6	−12.6
BIC	380.5	385.0	10.4
Log.Lik.	−181.499	−180.813	12.313
F	257.092	129.365	375.333

modelplot(models, coef_omit = 'Interc')

modelplot(models, facet = TRUE)

library(wesanderson)
library(ggplot2)

modelplot(models) +
  labs(x = 'Coefficients', 
       y = 'Term names',
       title = 'Linear regression models of "Bill Length (cm)"',
       caption = "Data source: Gorman, Williams & Fraser (2014), packaged for R by @apreshill and @allison_horst") +
  scale_color_manual(values = wes_palette('Darjeeling1'))

modelplot(mod, size = 1, fatten = .7, color = 'darkgreen', linetype = 'dotted') + 
  theme_classic()

library(ggplot2)
library(modelsummary)

models <- list(
  lm(vs ~ carb + mpg + cyl, data = mtcars),
  lm(disp ~ carb + mpg + cyl, data = mtcars),
  lm(hp ~ carb + mpg + cyl, data = mtcars))

models <- dvnames(models)
modelplot(models, draw = FALSE)

##           term model     estimate    std.error      conf.low    conf.high
## 1  (Intercept)    vs   2.41742511   0.67622094    1.03224931   3.80260091
## 5  (Intercept)  disp 112.57276339 114.86315481 -122.71374324 347.85927003
## 9  (Intercept)    hp -10.56116383  68.75946117 -151.40853516 130.28620751
## 2         carb    vs  -0.06945116   0.03943402   -0.15022810   0.01132577
## 6         carb  disp -12.30144724   6.69827859  -26.02224894   1.41935446
## 10        carb    hp  17.75593287   4.00972816    9.54237706  25.96948867
## 3          mpg    vs  -0.01513960   0.01716410   -0.05029868   0.02001947
## 7          mpg  disp  -7.14964651   2.91550156  -13.12178072  -1.17751230
## 11         mpg    hp  -1.00486469   1.74527956   -4.57990780   2.57017842
## 4          cyl    vs  -0.23926135   0.05687969   -0.35577411  -0.12274859
## 8          cyl  disp  47.90105842   9.66160634   28.11015499  67.69196184
## 12         cyl    hp  20.60581208   5.78363747    8.75856779  32.45305638

modelplot(models, color = "black") + facet_grid(~model)

modelplot(mod, conf_level = .99)

modelplot(mod, conf_level = NULL)

library(ggplot2)

b <- list(geom_vline(xintercept = 0, color = 'orange'),
          annotate("rect", alpha = .1,
                   xmin = -.5, xmax = .5, 
                   ymin = -Inf, ymax = Inf),
          geom_point(aes(y = term, x = estimate), alpha = .3, 
                     size = 10, color = 'red', shape = 'square'))

modelplot(mod, background = b)

modelplot(models, draw = FALSE)

##           term model     estimate    std.error      conf.low    conf.high
## 1  (Intercept)    vs   2.41742511   0.67622094    1.03224931   3.80260091
## 5  (Intercept)  disp 112.57276339 114.86315481 -122.71374324 347.85927003
## 9  (Intercept)    hp -10.56116383  68.75946117 -151.40853516 130.28620751
## 2         carb    vs  -0.06945116   0.03943402   -0.15022810   0.01132577
## 6         carb  disp -12.30144724   6.69827859  -26.02224894   1.41935446
## 10        carb    hp  17.75593287   4.00972816    9.54237706  25.96948867
## 3          mpg    vs  -0.01513960   0.01716410   -0.05029868   0.02001947
## 7          mpg  disp  -7.14964651   2.91550156  -13.12178072  -1.17751230
## 11         mpg    hp  -1.00486469   1.74527956   -4.57990780   2.57017842
## 4          cyl    vs  -0.23926135   0.05687969   -0.35577411  -0.12274859
## 8          cyl  disp  47.90105842   9.66160634   28.11015499  67.69196184
## 12         cyl    hp  20.60581208   5.78363747    8.75856779  32.45305638