PartI
library (modelsummary)
url <- 'https://vincentarelbundock.github.io/Rdatasets/csv/HistData/Guerry.csv'
dat <- read.csv (url)
dat$ Small <- dat$ Pop1831 > median (dat$ Pop1831)
#数据的快速概述
datasummary_skim (dat)
Unique (#)
Missing (%)
Mean
SD
Min
Median
Max
X
86
0
43.5
25.0
1.0
43.5
86.0
dept
86
0
46.9
30.4
1.0
45.5
200.0
Crime_pers
85
0
19754.4
7504.7
2199.0
18748.5
37014.0
Crime_prop
86
0
7843.1
3051.4
1368.0
7595.0
20235.0
Literacy
50
0
39.3
17.4
12.0
38.0
74.0
Donations
85
0
7075.5
5834.6
1246.0
5020.0
37015.0
Infants
86
0
19049.9
8820.2
2660.0
17141.5
62486.0
Suicides
86
0
36522.6
31312.5
3460.0
26743.5
163241.0
Wealth
86
0
43.5
25.0
1.0
43.5
86.0
Commerce
84
0
42.8
25.0
1.0
42.5
86.0
Clergy
85
0
43.4
25.0
1.0
43.5
86.0
Crime_parents
86
0
43.5
25.0
1.0
43.5
86.0
Infanticide
81
0
43.5
24.9
1.0
43.5
86.0
Donation_clergy
86
0
43.5
25.0
1.0
43.5
86.0
Lottery
86
0
43.5
25.0
1.0
43.5
86.0
Desertion
86
0
43.5
25.0
1.0
43.5
86.0
Instruction
82
0
43.1
24.8
1.0
41.5
86.0
Prostitutes
63
0
141.9
521.0
0.0
33.0
4744.0
Distance
86
0
208.0
109.3
0.0
200.6
539.2
Area
84
0
6147.0
1398.2
762.0
6070.5
10000.0
Pop1831
86
0
378.6
148.8
129.1
346.2
989.9
#平衡表(又名“表1”),各子组的平均值不同:
datasummary_balance (~ Small, dat)
## Warning in sanitize_datasummary_balance_data(formula, data): These variables
## were omitted because they include more than 50 levels: Department.
## Warning in datasummary_balance(~Small, dat): Please install the `estimatr`
## package or set `dinm=FALSE` to suppress this warning.
FALSE (N=43)
TRUE (N=43)
Mean
Std. Dev.
Mean
Std. Dev.
X
41.4
27.4
45.6
22.3
dept
46.0
36.6
47.7
23.0
Crime_pers
18040.6
7638.4
21468.2
7044.3
Crime_prop
8422.5
3406.7
7263.7
2559.3
Literacy
37.9
19.1
40.6
15.6
Donations
7258.5
6194.1
6892.6
5519.0
Infants
20790.2
9363.5
17309.6
7973.0
Suicides
42565.4
37074.1
30479.8
23130.9
Wealth
51.0
23.9
36.0
23.9
Commerce
42.7
24.6
43.0
25.7
Clergy
39.1
26.7
47.7
22.7
Crime_parents
54.2
25.2
32.8
19.9
Infanticide
37.9
25.7
49.1
23.1
Donation_clergy
52.3
24.0
34.7
22.9
Lottery
54.8
23.0
32.2
21.7
Desertion
41.7
25.9
45.3
24.2
Instruction
46.7
26.7
39.6
22.5
Prostitutes
52.8
93.1
230.9
724.1
Distance
228.7
116.7
187.2
98.4
Area
5989.0
1142.8
6305.0
1612.4
Pop1831
272.4
53.4
484.8
137.3
N
Pct.
N
Pct.
Region
C
13
30.2
4
9.3
E
9
20.9
8
18.6
N
4
9.3
13
30.2
S
12
27.9
5
11.6
W
4
9.3
13
30.2
NA
1
2.3
0
0.0
MainCity
1:Sm
10
23.3
0
0.0
2:Med
33
76.7
33
76.7
3:Lg
0
0.0
10
23.3
#相关表
datasummary_correlation (dat)
X
dept
Crime_pers
Crime_prop
Literacy
Donations
Infants
Suicides
Wealth
Commerce
Clergy
Crime_parents
Infanticide
Donation_clergy
Lottery
Desertion
Instruction
Prostitutes
Distance
Area
Pop1831
X
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
dept
.92
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Crime_pers
−.12
−.20
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Crime_prop
−.28
−.29
.27
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Literacy
.09
.10
−.04
−.37
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Donations
.06
.27
−.04
−.13
−.13
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Infants
−.07
−.03
−.04
.27
−.41
.17
1
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Suicides
−.18
−.15
−.13
.52
−.37
−.03
.29
1
.
.
.
.
.
.
.
.
.
.
.
.
.
Wealth
−.08
−.08
−.12
.46
−.28
.08
.34
.42
1
.
.
.
.
.
.
.
.
.
.
.
.
Commerce
−.03
.05
.05
.41
−.58
.30
.39
.48
.48
1
.
.
.
.
.
.
.
.
.
.
.
Clergy
.15
.06
.26
−.07
−.17
.09
−.06
−.32
−.11
−.12
1
.
.
.
.
.
.
.
.
.
.
Crime_parents
−.16
−.07
−.20
.36
−.20
−.02
.06
.35
.22
.18
−.18
1
.
.
.
.
.
.
.
.
.
Infanticide
.01
−.07
.27
−.13
.32
−.15
−.24
−.08
−.22
−.28
−.01
−.09
1
.
.
.
.
.
.
.
.
Donation_clergy
−.08
.00
−.18
.30
−.38
.25
.10
.19
.34
.18
.30
.29
−.23
1
.
.
.
.
.
.
.
Lottery
−.21
−.11
.00
.43
−.36
.15
.42
.49
.48
.45
−.28
.28
−.35
.36
1
.
.
.
.
.
.
Desertion
.10
.03
.33
−.26
.40
−.04
.00
−.47
−.23
−.36
.25
−.39
.11
−.41
−.30
1
.
.
.
.
.
Instruction
−.10
−.11
.05
.39
−.98
.14
.43
.36
.31
.59
.21
.21
−.32
.40
.37
−.37
1
.
.
.
.
Prostitutes
.17
.14
−.05
−.33
.30
−.07
−.28
−.21
−.32
−.27
.20
−.03
.16
−.04
−.28
.04
−.26
1
.
.
.
Distance
−.10
.04
−.51
.25
−.28
.08
.23
.41
.40
.38
−.31
.26
−.16
.28
.28
−.44
.24
−.37
1
.
.
Area
−.18
−.08
.22
.09
−.23
.18
.16
.00
.06
.18
.08
−.20
−.23
.02
.23
.04
.20
−.41
.06
1
.
Pop1831
.17
.09
.27
−.26
.09
.00
−.23
−.17
−.31
−.05
.29
−.40
.34
−.22
−.47
.11
−.11
.48
−.37
−.01
1
#两个变量和两个统计数据,嵌套在子组中
datasummary (Literacy + Commerce ~ Small * (mean + sd), dat)
FALSE
TRUE
mean
sd
mean
sd
Literacy
37.88
19.08
40.63
15.57
Commerce
42.65
24.59
42.95
25.75
#估计一个线性模型并显示结果
mod <- lm (Donations ~ Crime_prop, data = dat)
modelsummary (mod)
Model 1
(Intercept)
9065.287
(1738.926)
Crime_prop
−0.254
(0.207)
Num.Obs.
86
R2
0.018
R2 Adj.
0.006
AIC
1739.0
BIC
1746.4
Log.Lik.
−866.516
F
1.505
#估计5个回归模型,并排显示结果,并将它们保存到Microsoft Word文档中
models <- list (
"OLS 1" = lm (Donations ~ Literacy + Clergy, data = dat),
"Poisson 1" = glm (Donations ~ Literacy + Commerce, family = poisson, data = dat),
"OLS 2" = lm (Crime_pers ~ Literacy + Clergy, data = dat),
"Poisson 2" = glm (Crime_pers ~ Literacy + Commerce, family = poisson, data = dat),
"OLS 3" = lm (Crime_prop ~ Literacy + Clergy, data = dat)
)
modelsummary (models, output = "table.docx" )
PartII
url <- 'https://vincentarelbundock.github.io/Rdatasets/csv/palmerpenguins/penguins.csv'
dat <- read.csv (url)
# rescale mm -> cm
dat$ bill_length_cm <- dat$ bill_length_mm / 10
dat$ flipper_length_cm <- dat$ flipper_length_mm / 10
mod <- lm (bill_length_cm ~ flipper_length_cm + species, data = dat)
library (modelsummary)
modelplot (mod)
modelplot (mod, coef_omit = 'Interc' )
cm <- c ('speciesChinstrap' = 'Chinstrap' ,
'speciesGentoo' = 'Gentoo' ,
'flipper_length_cm' = 'Flipper length (cm)' )
modelplot (mod, coef_map = cm)
models <- list (
"Small model" = lm (bill_length_cm ~ flipper_length_cm, data = dat),
"Medium model" = lm (bill_length_cm ~ flipper_length_cm + body_mass_g, data = dat),
"Large model" = lm (bill_length_cm ~ flipper_length_cm + body_mass_g + species, data = dat))
modelsummary (models, statistic = 'conf.int' )
Small model
Medium model
Large model
(Intercept)
−0.726
−0.344
0.984
[−1.356, −0.097]
[−1.245, 0.557]
[0.215, 1.752]
flipper_length_cm
0.255
0.222
0.095
[0.224, 0.286]
[0.158, 0.285]
[0.048, 0.142]
body_mass_g
0.000
0.000
[0.000, 0.000]
[0.000, 0.000]
speciesChinstrap
0.939
[0.867, 1.011]
speciesGentoo
0.207
[0.088, 0.326]
Num.Obs.
342
342
342
R2
0.431
0.433
0.817
R2 Adj.
0.429
0.430
0.815
AIC
369.0
369.6
−12.6
BIC
380.5
385.0
10.4
Log.Lik.
−181.499
−180.813
12.313
F
257.092
129.365
375.333
modelplot (models, coef_omit = 'Interc' )
modelplot (models, facet = TRUE )
library (wesanderson)
library (ggplot2)
modelplot (models) +
labs (x = 'Coefficients' ,
y = 'Term names' ,
title = 'Linear regression models of "Bill Length (cm)"' ,
caption = "Data source: Gorman, Williams & Fraser (2014), packaged for R by @apreshill and @allison_horst" ) +
scale_color_manual (values = wes_palette ('Darjeeling1' ))
modelplot (mod, size = 1 , fatten = .7 , color = 'darkgreen' , linetype = 'dotted' ) +
theme_classic ()
library (ggplot2)
library (modelsummary)
models <- list (
lm (vs ~ carb + mpg + cyl, data = mtcars),
lm (disp ~ carb + mpg + cyl, data = mtcars),
lm (hp ~ carb + mpg + cyl, data = mtcars))
models <- dvnames (models)
modelplot (models, draw = FALSE )
## term model estimate std.error conf.low conf.high
## 1 (Intercept) vs 2.41742511 0.67622094 1.03224931 3.80260091
## 5 (Intercept) disp 112.57276339 114.86315481 -122.71374324 347.85927003
## 9 (Intercept) hp -10.56116383 68.75946117 -151.40853516 130.28620751
## 2 carb vs -0.06945116 0.03943402 -0.15022810 0.01132577
## 6 carb disp -12.30144724 6.69827859 -26.02224894 1.41935446
## 10 carb hp 17.75593287 4.00972816 9.54237706 25.96948867
## 3 mpg vs -0.01513960 0.01716410 -0.05029868 0.02001947
## 7 mpg disp -7.14964651 2.91550156 -13.12178072 -1.17751230
## 11 mpg hp -1.00486469 1.74527956 -4.57990780 2.57017842
## 4 cyl vs -0.23926135 0.05687969 -0.35577411 -0.12274859
## 8 cyl disp 47.90105842 9.66160634 28.11015499 67.69196184
## 12 cyl hp 20.60581208 5.78363747 8.75856779 32.45305638
modelplot (models, color = "black" ) + facet_grid (~ model)
modelplot (mod, conf_level = .99 )
modelplot (mod, conf_level = NULL )
library (ggplot2)
b <- list (geom_vline (xintercept = 0 , color = 'orange' ),
annotate ("rect" , alpha = .1 ,
xmin = - .5 , xmax = .5 ,
ymin = - Inf , ymax = Inf ),
geom_point (aes (y = term, x = estimate), alpha = .3 ,
size = 10 , color = 'red' , shape = 'square' ))
modelplot (mod, background = b)
modelplot (models, draw = FALSE )
## term model estimate std.error conf.low conf.high
## 1 (Intercept) vs 2.41742511 0.67622094 1.03224931 3.80260091
## 5 (Intercept) disp 112.57276339 114.86315481 -122.71374324 347.85927003
## 9 (Intercept) hp -10.56116383 68.75946117 -151.40853516 130.28620751
## 2 carb vs -0.06945116 0.03943402 -0.15022810 0.01132577
## 6 carb disp -12.30144724 6.69827859 -26.02224894 1.41935446
## 10 carb hp 17.75593287 4.00972816 9.54237706 25.96948867
## 3 mpg vs -0.01513960 0.01716410 -0.05029868 0.02001947
## 7 mpg disp -7.14964651 2.91550156 -13.12178072 -1.17751230
## 11 mpg hp -1.00486469 1.74527956 -4.57990780 2.57017842
## 4 cyl vs -0.23926135 0.05687969 -0.35577411 -0.12274859
## 8 cyl disp 47.90105842 9.66160634 28.11015499 67.69196184
## 12 cyl hp 20.60581208 5.78363747 8.75856779 32.45305638