2 表格
代码提供: 林丹 陈雪兰 李雨彤 杨荣丽 鹏佳琪
2.1 gtsummary包
例1
library(tidyverse)
library(haven)
library(sjmisc)
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
library(sjlabelled)
##
## Attaching package: 'sjlabelled'
## The following objects are masked from 'package:haven':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:dplyr':
##
## as_label
## The following object is masked from 'package:ggplot2':
##
## as_label
<- read_dta("D://02_work/2020sRBook/css2019.dta")
css2019 <- css2019 %>% mutate(age = 2019-a1_1_a)
css2019 <-
css2019%>%
css2019 mutate(agegroup = case_when(
<= 40 ~ "40岁以下",
age >40 & age <= 60 ~ "40-60",
age > 60 ~ "60以上"
age
))<-
css1 %>% select(
css2019
agegroup,=d4b1_1,
上网浏览时政信息=d4b1_2,
网上娱乐=d4b1_3,
上网聊天交友=d4b1_4,
网上商务或者工作=d4b1_5,
网上学习教育=d4b1_6,
网上购物与生活服务=d4b1_7,
网上投资理财
)$agegroup<-as.factor(css1$agegroup)
css1$agegroup<-factor(css1$agegroup, levels = c("40岁以下", "40-60", "60以上") )
css1library(gtsummary)
## #StandWithUkraine
tbl_summary(css1)
## ℹ Column(s) 上网浏览时政信息, 网上娱乐, 上网聊天交友, 网上商务或者工作, 网上学习教育, 网上购物与生活服务, and 网上投资理财 are class "haven_labelled". This is an intermediate datastructure not meant for analysis. Convert columns with `haven::as_factor()`, `labelled::to_factor()`, `labelled::unlabelled()`, and `unclass()`. "haven_labelled" value labels are ignored when columns are not converted. Failure to convert may have unintended consequences or result in error.
## • https://haven.tidyverse.org/articles/semantics.html
## • https://larmarange.github.io/labelled/articles/intro_labelled.html#unlabelled
Characteristic | N = 10,2831 |
---|---|
agegroup | |
40岁以下 | 3,506 (34%) |
40-60 | 4,676 (45%) |
60以上 | 2,099 (20%) |
Unknown | 2 |
1.浏览时政信息(比如:看党政新闻) | |
1 | 3,028 (45%) |
2 | 1,142 (17%) |
3 | 927 (14%) |
4 | 333 (4.9%) |
5 | 315 (4.7%) |
6 | 993 (15%) |
Unknown | 3,545 |
2.娱乐、休闲(比如:玩网络游戏/听音乐/看视频/读小说) | |
1 | 3,030 (45%) |
2 | 1,529 (23%) |
3 | 791 (12%) |
4 | 278 (4.1%) |
5 | 206 (3.1%) |
6 | 904 (13%) |
Unknown | 3,545 |
3.聊天交友(比如:微信等交友活动) | |
1 | 3,663 (54%) |
2 | 1,306 (19%) |
3 | 755 (11%) |
4 | 265 (3.9%) |
5 | 176 (2.6%) |
6 | 573 (8.5%) |
Unknown | 3,545 |
4.商务或者工作 | |
1 | 1,716 (25%) |
2 | 723 (11%) |
3 | 338 (5.0%) |
4 | 230 (3.4%) |
5 | 263 (3.9%) |
6 | 3,468 (51%) |
Unknown | 3,545 |
5.学习教育 | |
1 | 1,297 (19%) |
2 | 1,149 (17%) |
3 | 692 (10%) |
4 | 408 (6.1%) |
5 | 405 (6.0%) |
6 | 2,787 (41%) |
Unknown | 3,545 |
6.网上购物/生活服务(比如:网购、外卖等) | |
1 | 440 (6.5%) |
2 | 903 (13%) |
3 | 834 (12%) |
4 | 1,536 (23%) |
5 | 938 (14%) |
6 | 2,087 (31%) |
Unknown | 3,545 |
7.投资理财 | |
1 | 144 (2.1%) |
2 | 93 (1.4%) |
3 | 109 (1.6%) |
4 | 158 (2.3%) |
5 | 488 (7.2%) |
6 | 5,746 (85%) |
Unknown | 3,545 |
1 n (%) |
例2
<- read_dta("D://02_work/2020sRBook/cgss2017.dta")
cgss2017 <-
cgssx %>% select(
cgss2017 gender = a2,
situation = a15,
affect = a16,
sad = a17
)$gender <-
cgssxfactor(cgssx$gender,
levels = c(1, 2),
labels = c("男", "女"))
table(cgssx$gender)
##
## 男 女
## 5935 6647
$sad <- as.numeric(cgssx$sad)
cgssx$sad <-
cgssx::recode(cgssx$sad, "1:2='3';3:4='2';'5'='1';98;99=NA")
cartable(cgssx$sad)
##
## 1 2 3
## 3537 7708 1308
$sad <-
cgssxfactor(cgssx$sad,
levels = c(1, 2,3),
labels = c("从无","很少","较常"))
table(cgssx$sad)
##
## 从无 很少 较常
## 3537 7708 1308
<- droplevels(cgssx)
cgssx $situation <- as.numeric(cgssx$situation)
cgssx$affect <- as.numeric(cgssx$affect)
cgssxtbl_summary(cgssx)
Characteristic | N = 12,5821 |
---|---|
gender | |
男 | 5,935 (47%) |
女 | 6,647 (53%) |
situation | |
1 | 593 (4.7%) |
2 | 2,014 (16%) |
3 | 3,261 (26%) |
4 | 4,409 (35%) |
5 | 2,300 (18%) |
98 | 3 (<0.1%) |
99 | 2 (<0.1%) |
affect | |
1 | 500 (4.0%) |
2 | 1,300 (10%) |
3 | 2,129 (17%) |
4 | 3,793 (30%) |
5 | 4,840 (38%) |
98 | 14 (0.1%) |
99 | 6 (<0.1%) |
sad | |
从无 | 3,537 (28%) |
很少 | 7,708 (61%) |
较常 | 1,308 (10%) |
Unknown | 29 |
1 n (%) |
2.2 table1包
例1
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
<-
cgssx1 %>% select(
cgss2017 gender = a2,
birth = a31,
edu = a7a,
income = a8a,
salary = a8b,
gpg = a35
)<- rename(cgssx1, fairplay = gpg)
cgssx1 $age <- 2017 - cgssx1$birth
cgssx1$education <- as.numeric(cgssx1$edu)
cgssx1$educ <-
cgssx1::recode(cgssx1$education, "9:13='3';5:8='2';1;4='1';'14'=NA")
cartable(cgssx1$educ)
##
## 1 2 3
## 3511 2326 5196
$educ <-
cgssx1factor(cgssx1$educ,
levels = c(1, 2, 3),
labels = c("pri", "middle", "high"))
table(cgssx1$educ)
##
## pri middle high
## 3511 2326 5196
$gender <-
cgssx1factor(cgssx1$gender,
levels = c(1, 2),
labels = c("男", "女"))
table(cgssx1$gender)
##
## 男 女
## 5935 6647
<- droplevels(cgssx1)
cgssx1 $salary <- as.numeric(cgssx1$salary)
cgssx1$income <- as.numeric(cgssx1$income)
cgssx1library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ gender+edu+income+ age+educ, data=cgssx1)
Overall (N=12582) |
|
---|---|
gender | |
男 | 5935 (47.2%) |
女 | 6647 (52.8%) |
您目前的最高教育程度是: | |
Mean (SD) | 5.18 (3.29) |
Median [Min, Max] | 4.00 [1.00, 14.0] |
income | |
Mean (SD) | 580000 (2270000) |
Median [Min, Max] | 24000 [0, 10000000] |
Missing | 2 (0.0%) |
[年] 您的出生日期是什么? | |
Mean (SD) | 51.0 (16.9) |
Median [Min, Max] | 52.0 [18.0, 103] |
educ | |
pri | 3511 (27.9%) |
middle | 2326 (18.5%) |
high | 5196 (41.3%) |
Missing | 1549 (12.3%) |
table1(~ edu+income+educ|gender, data=cgssx1)
男 (N=5935) |
女 (N=6647) |
Overall (N=12582) |
|
---|---|---|---|
您目前的最高教育程度是: | |||
Mean (SD) | 5.51 (3.21) | 4.88 (3.33) | 5.18 (3.29) |
Median [Min, Max] | 4.00 [1.00, 14.0] | 4.00 [1.00, 14.0] | 4.00 [1.00, 14.0] |
income | |||
Mean (SD) | 524000 (2130000) | 630000 (2380000) | 580000 (2270000) |
Median [Min, Max] | 30000 [0, 10000000] | 20000 [0, 10000000] | 24000 [0, 10000000] |
Missing | 1 (0.0%) | 1 (0.0%) | 2 (0.0%) |
educ | |||
pri | 1818 (30.6%) | 1693 (25.5%) | 3511 (27.9%) |
middle | 1202 (20.3%) | 1124 (16.9%) | 2326 (18.5%) |
high | 2524 (42.5%) | 2672 (40.2%) | 5196 (41.3%) |
Missing | 391 (6.6%) | 1158 (17.4%) | 1549 (12.3%) |
#以下运行会报错,说起始变量不能包含缺失值。不知道是不是因为现在R版本不允许,先注释上吧。 ——2023.4.23 黄国政
#网页"https://www.rdocumentation.org/packages/table1/versions/1.4.3/topics/table1"给出相关说明,提到"Stratification variables may not contain missing values",其中起始变量就是|后面的变量,最多只能两个,且第二个会嵌套在第一个当中。
#table1(~ income|gender*educ, data=cgssx1)
#table1(~ income|gender*educ, data=cgssx1, overall=FALSE)
例2
table1(~ gender+situation+affect+sad, data=cgssx)
Overall (N=12582) |
|
---|---|
gender | |
男 | 5935 (47.2%) |
女 | 6647 (52.8%) |
situation | |
Mean (SD) | 3.50 (2.19) |
Median [Min, Max] | 4.00 [1.00, 99.0] |
affect | |
Mean (SD) | 4.04 (3.93) |
Median [Min, Max] | 4.00 [1.00, 99.0] |
sad | |
从无 | 3537 (28.1%) |
很少 | 7708 (61.3%) |
较常 | 1308 (10.4%) |
Missing | 29 (0.2%) |
table1(~ situation+affect+sad|gender, data=cgssx)
男 (N=5935) |
女 (N=6647) |
Overall (N=12582) |
|
---|---|---|---|
situation | |||
Mean (SD) | 3.60 (2.69) | 3.41 (1.61) | 3.50 (2.19) |
Median [Min, Max] | 4.00 [1.00, 99.0] | 4.00 [1.00, 99.0] | 4.00 [1.00, 99.0] |
affect | |||
Mean (SD) | 4.07 (3.43) | 4.01 (4.33) | 4.04 (3.93) |
Median [Min, Max] | 4.00 [1.00, 99.0] | 4.00 [1.00, 99.0] | 4.00 [1.00, 99.0] |
sad | |||
从无 | 1797 (30.3%) | 1740 (26.2%) | 3537 (28.1%) |
很少 | 3585 (60.4%) | 4123 (62.0%) | 7708 (61.3%) |
较常 | 542 (9.1%) | 766 (11.5%) | 1308 (10.4%) |
Missing | 11 (0.2%) | 18 (0.3%) | 29 (0.2%) |
table1(~ situation|gender*affect, data=cgssx)
## Warning in table1.formula(~situation | gender * affect, data = cgssx): Terms to
## the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 21 columns. Are you sure this is what you want?
男 |
女 |
Overall |
|||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 (N=235) |
2 (N=544) |
3 (N=909) |
4 (N=1775) |
5 (N=2465) |
98 (N=5) |
99 (N=2) |
1 (N=265) |
2 (N=756) |
3 (N=1220) |
4 (N=2018) |
5 (N=2375) |
98 (N=9) |
99 (N=4) |
1 (N=500) |
2 (N=1300) |
3 (N=2129) |
4 (N=3793) |
5 (N=4840) |
98 (N=14) |
99 (N=6) |
|
situation | |||||||||||||||||||||
Mean (SD) | 2.12 (6.33) | 2.16 (0.794) | 2.99 (3.27) | 3.80 (0.803) | 4.14 (2.82) | 3.40 (1.14) | 3.00 (0) | 1.67 (0.827) | 2.12 (0.786) | 2.86 (0.854) | 3.73 (0.826) | 4.03 (2.14) | 3.11 (1.36) | 4.00 (0.816) | 1.88 (4.39) | 2.14 (0.789) | 2.92 (2.23) | 3.76 (0.816) | 4.08 (2.51) | 3.21 (1.25) | 3.67 (0.816) |
Median [Min, Max] | 2.00 [1.00, 98.0] | 2.00 [1.00, 5.00] | 3.00 [1.00, 98.0] | 4.00 [1.00, 5.00] | 4.00 [1.00, 99.0] | 3.00 [2.00, 5.00] | 3.00 [3.00, 3.00] | 1.00 [1.00, 5.00] | 2.00 [1.00, 5.00] | 3.00 [1.00, 5.00] | 4.00 [1.00, 5.00] | 4.00 [1.00, 99.0] | 3.00 [1.00, 5.00] | 4.00 [3.00, 5.00] | 2.00 [1.00, 98.0] | 2.00 [1.00, 5.00] | 3.00 [1.00, 98.0] | 4.00 [1.00, 5.00] | 4.00 [1.00, 99.0] | 3.00 [1.00, 5.00] | 3.50 [3.00, 5.00] |
table1(~ sad|gender*situation, data=cgssx)
## Warning in table1.formula(~sad | gender * situation, data = cgssx): Terms to
## the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 20 columns. Are you sure this is what you want?
男 |
女 |
Overall |
||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 (N=251) |
2 (N=861) |
3 (N=1483) |
4 (N=2144) |
5 (N=1192) |
98 (N=3) |
99 (N=1) |
1 (N=342) |
2 (N=1153) |
3 (N=1778) |
4 (N=2265) |
5 (N=1108) |
99 (N=1) |
1 (N=593) |
2 (N=2014) |
3 (N=3261) |
4 (N=4409) |
5 (N=2300) |
98 (N=3) |
99 (N=2) |
|
sad | ||||||||||||||||||||
从无 | 23 (9.2%) | 129 (15.0%) | 375 (25.3%) | 714 (33.3%) | 555 (46.6%) | 1 (33.3%) | 0 (0%) | 26 (7.6%) | 143 (12.4%) | 418 (23.5%) | 680 (30.0%) | 472 (42.6%) | 1 (100%) | 49 (8.3%) | 272 (13.5%) | 793 (24.3%) | 1394 (31.6%) | 1027 (44.7%) | 1 (33.3%) | 1 (50.0%) |
很少 | 121 (48.2%) | 557 (64.7%) | 964 (65.0%) | 1342 (62.6%) | 600 (50.3%) | 0 (0%) | 1 (100%) | 146 (42.7%) | 729 (63.2%) | 1156 (65.0%) | 1490 (65.8%) | 602 (54.3%) | 0 (0%) | 267 (45.0%) | 1286 (63.9%) | 2120 (65.0%) | 2832 (64.2%) | 1202 (52.3%) | 0 (0%) | 1 (50.0%) |
较常 | 105 (41.8%) | 173 (20.1%) | 140 (9.4%) | 86 (4.0%) | 37 (3.1%) | 1 (33.3%) | 0 (0%) | 167 (48.8%) | 277 (24.0%) | 196 (11.0%) | 95 (4.2%) | 31 (2.8%) | 0 (0%) | 272 (45.9%) | 450 (22.3%) | 336 (10.3%) | 181 (4.1%) | 68 (3.0%) | 1 (33.3%) | 0 (0%) |
Missing | 2 (0.8%) | 2 (0.2%) | 4 (0.3%) | 2 (0.1%) | 0 (0%) | 1 (33.3%) | 0 (0%) | 3 (0.9%) | 4 (0.3%) | 8 (0.5%) | 0 (0%) | 3 (0.3%) | 0 (0%) | 5 (0.8%) | 6 (0.3%) | 12 (0.4%) | 2 (0.0%) | 3 (0.1%) | 1 (33.3%) | 0 (0%) |