2 表格

代码提供: 林丹 陈雪兰 李雨彤 杨荣丽 鹏佳琪

2.1 gtsummary包

例1

library(tidyverse)
library(haven)
library(sjmisc)
## 
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
## 
##     is_empty
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## The following object is masked from 'package:tibble':
## 
##     add_case
library(sjlabelled)
## 
## Attaching package: 'sjlabelled'
## The following objects are masked from 'package:haven':
## 
##     as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
## The following object is masked from 'package:forcats':
## 
##     as_factor
## The following object is masked from 'package:dplyr':
## 
##     as_label
## The following object is masked from 'package:ggplot2':
## 
##     as_label
css2019 <- read_dta("D://02_work/2020sRBook/css2019.dta")
css2019 <- css2019 %>% mutate(age = 2019-a1_1_a)
css2019<-
  css2019 %>%
  mutate(agegroup = case_when(
    age <= 40                ~ "40岁以下",
    age  >40 & age <= 60 ~ "40-60",
    age  > 60                 ~ "60以上"
  ))
css1 <-
  css2019 %>% select(
    agegroup,
    上网浏览时政信息=d4b1_1,
    网上娱乐=d4b1_2,
    上网聊天交友=d4b1_3,
    网上商务或者工作=d4b1_4,
    网上学习教育=d4b1_5,
    网上购物与生活服务=d4b1_6,
    网上投资理财=d4b1_7,
  )
css1$agegroup<-as.factor(css1$agegroup)
css1$agegroup<-factor(css1$agegroup, levels = c("40岁以下", "40-60", "60以上") )
library(gtsummary)
## #StandWithUkraine
tbl_summary(css1)
## ℹ Column(s) 上网浏览时政信息, 网上娱乐, 上网聊天交友, 网上商务或者工作, 网上学习教育, 网上购物与生活服务, and 网上投资理财 are class "haven_labelled". This is an intermediate datastructure not meant for analysis. Convert columns with `haven::as_factor()`, `labelled::to_factor()`, `labelled::unlabelled()`, and `unclass()`. "haven_labelled" value labels are ignored when columns are not converted. Failure to convert may have unintended consequences or result in error.
## • https://haven.tidyverse.org/articles/semantics.html
## • https://larmarange.github.io/labelled/articles/intro_labelled.html#unlabelled
Characteristic N = 10,2831
agegroup
    40岁以下 3,506 (34%)
    40-60 4,676 (45%)
    60以上 2,099 (20%)
    Unknown 2
1.浏览时政信息(比如:看党政新闻)
    1 3,028 (45%)
    2 1,142 (17%)
    3 927 (14%)
    4 333 (4.9%)
    5 315 (4.7%)
    6 993 (15%)
    Unknown 3,545
2.娱乐、休闲(比如:玩网络游戏/听音乐/看视频/读小说)
    1 3,030 (45%)
    2 1,529 (23%)
    3 791 (12%)
    4 278 (4.1%)
    5 206 (3.1%)
    6 904 (13%)
    Unknown 3,545
3.聊天交友(比如:微信等交友活动)
    1 3,663 (54%)
    2 1,306 (19%)
    3 755 (11%)
    4 265 (3.9%)
    5 176 (2.6%)
    6 573 (8.5%)
    Unknown 3,545
4.商务或者工作
    1 1,716 (25%)
    2 723 (11%)
    3 338 (5.0%)
    4 230 (3.4%)
    5 263 (3.9%)
    6 3,468 (51%)
    Unknown 3,545
5.学习教育
    1 1,297 (19%)
    2 1,149 (17%)
    3 692 (10%)
    4 408 (6.1%)
    5 405 (6.0%)
    6 2,787 (41%)
    Unknown 3,545
6.网上购物/生活服务(比如:网购、外卖等)
    1 440 (6.5%)
    2 903 (13%)
    3 834 (12%)
    4 1,536 (23%)
    5 938 (14%)
    6 2,087 (31%)
    Unknown 3,545
7.投资理财
    1 144 (2.1%)
    2 93 (1.4%)
    3 109 (1.6%)
    4 158 (2.3%)
    5 488 (7.2%)
    6 5,746 (85%)
    Unknown 3,545
1 n (%)

例2

cgss2017 <- read_dta("D://02_work/2020sRBook/cgss2017.dta")
cgssx <-
  cgss2017 %>% select(
    gender = a2,
    situation = a15,
    affect = a16,
    sad = a17
  )
cgssx$gender <-
  factor(cgssx$gender,
         levels = c(1, 2),
         labels = c("男", "女"))
table(cgssx$gender)
## 
##   男   女 
## 5935 6647
cgssx$sad <- as.numeric(cgssx$sad)
cgssx$sad <-
  car::recode(cgssx$sad, "1:2='3';3:4='2';'5'='1';98;99=NA")
table(cgssx$sad)
## 
##    1    2    3 
## 3537 7708 1308
cgssx$sad <-
  factor(cgssx$sad,
         levels = c(1, 2,3),
         labels = c("从无","很少","较常"))
table(cgssx$sad)
## 
## 从无 很少 较常 
## 3537 7708 1308
cgssx <- droplevels(cgssx)
cgssx$situation <- as.numeric(cgssx$situation)
cgssx$affect <- as.numeric(cgssx$affect)
tbl_summary(cgssx)
Characteristic N = 12,5821
gender
    男 5,935 (47%)
    女 6,647 (53%)
situation
    1 593 (4.7%)
    2 2,014 (16%)
    3 3,261 (26%)
    4 4,409 (35%)
    5 2,300 (18%)
    98 3 (<0.1%)
    99 2 (<0.1%)
affect
    1 500 (4.0%)
    2 1,300 (10%)
    3 2,129 (17%)
    4 3,793 (30%)
    5 4,840 (38%)
    98 14 (0.1%)
    99 6 (<0.1%)
sad
    从无 3,537 (28%)
    很少 7,708 (61%)
    较常 1,308 (10%)
    Unknown 29
1 n (%)

2.2 table1包

例1

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
cgssx1 <-
  cgss2017 %>% select(
    gender = a2,
    birth = a31,
    edu = a7a,
    income = a8a,
    salary = a8b,
    gpg = a35
  )
cgssx1 <- rename(cgssx1,  fairplay = gpg)
cgssx1$age <- 2017 - cgssx1$birth
cgssx1$education <- as.numeric(cgssx1$edu)
cgssx1$educ <-
  car::recode(cgssx1$education, "9:13='3';5:8='2';1;4='1';'14'=NA")
table(cgssx1$educ)
## 
##    1    2    3 
## 3511 2326 5196
cgssx1$educ <-
  factor(cgssx1$educ,
         levels = c(1, 2, 3),
         labels = c("pri", "middle", "high"))
table(cgssx1$educ)
## 
##    pri middle   high 
##   3511   2326   5196
cgssx1$gender <-
  factor(cgssx1$gender,
         levels = c(1, 2),
         labels = c("男", "女"))
table(cgssx1$gender)
## 
##   男   女 
## 5935 6647
cgssx1 <- droplevels(cgssx1)
cgssx1$salary <- as.numeric(cgssx1$salary)
cgssx1$income <- as.numeric(cgssx1$income)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~ gender+edu+income+ age+educ, data=cgssx1)
Overall
(N=12582)
gender
5935 (47.2%)
6647 (52.8%)
您目前的最高教育程度是:
Mean (SD) 5.18 (3.29)
Median [Min, Max] 4.00 [1.00, 14.0]
income
Mean (SD) 580000 (2270000)
Median [Min, Max] 24000 [0, 10000000]
Missing 2 (0.0%)
[年]  您的出生日期是什么?
Mean (SD) 51.0 (16.9)
Median [Min, Max] 52.0 [18.0, 103]
educ
pri 3511 (27.9%)
middle 2326 (18.5%)
high 5196 (41.3%)
Missing 1549 (12.3%)
table1(~ edu+income+educ|gender, data=cgssx1)

(N=5935)

(N=6647)
Overall
(N=12582)
您目前的最高教育程度是:
Mean (SD) 5.51 (3.21) 4.88 (3.33) 5.18 (3.29)
Median [Min, Max] 4.00 [1.00, 14.0] 4.00 [1.00, 14.0] 4.00 [1.00, 14.0]
income
Mean (SD) 524000 (2130000) 630000 (2380000) 580000 (2270000)
Median [Min, Max] 30000 [0, 10000000] 20000 [0, 10000000] 24000 [0, 10000000]
Missing 1 (0.0%) 1 (0.0%) 2 (0.0%)
educ
pri 1818 (30.6%) 1693 (25.5%) 3511 (27.9%)
middle 1202 (20.3%) 1124 (16.9%) 2326 (18.5%)
high 2524 (42.5%) 2672 (40.2%) 5196 (41.3%)
Missing 391 (6.6%) 1158 (17.4%) 1549 (12.3%)
#以下运行会报错,说起始变量不能包含缺失值。不知道是不是因为现在R版本不允许,先注释上吧。 ——2023.4.23 黄国政
#网页"https://www.rdocumentation.org/packages/table1/versions/1.4.3/topics/table1"给出相关说明,提到"Stratification variables may not contain missing values",其中起始变量就是|后面的变量,最多只能两个,且第二个会嵌套在第一个当中。
#table1(~ income|gender*educ, data=cgssx1)
#table1(~ income|gender*educ, data=cgssx1, overall=FALSE)

例2

table1(~ gender+situation+affect+sad, data=cgssx)
Overall
(N=12582)
gender
5935 (47.2%)
6647 (52.8%)
situation
Mean (SD) 3.50 (2.19)
Median [Min, Max] 4.00 [1.00, 99.0]
affect
Mean (SD) 4.04 (3.93)
Median [Min, Max] 4.00 [1.00, 99.0]
sad
从无 3537 (28.1%)
很少 7708 (61.3%)
较常 1308 (10.4%)
Missing 29 (0.2%)
table1(~ situation+affect+sad|gender, data=cgssx)

(N=5935)

(N=6647)
Overall
(N=12582)
situation
Mean (SD) 3.60 (2.69) 3.41 (1.61) 3.50 (2.19)
Median [Min, Max] 4.00 [1.00, 99.0] 4.00 [1.00, 99.0] 4.00 [1.00, 99.0]
affect
Mean (SD) 4.07 (3.43) 4.01 (4.33) 4.04 (3.93)
Median [Min, Max] 4.00 [1.00, 99.0] 4.00 [1.00, 99.0] 4.00 [1.00, 99.0]
sad
从无 1797 (30.3%) 1740 (26.2%) 3537 (28.1%)
很少 3585 (60.4%) 4123 (62.0%) 7708 (61.3%)
较常 542 (9.1%) 766 (11.5%) 1308 (10.4%)
Missing 11 (0.2%) 18 (0.3%) 29 (0.2%)
table1(~ situation|gender*affect, data=cgssx)
## Warning in table1.formula(~situation | gender * affect, data = cgssx): Terms to
## the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 21 columns. Are you sure this is what you want?
Overall
1
(N=235)
2
(N=544)
3
(N=909)
4
(N=1775)
5
(N=2465)
98
(N=5)
99
(N=2)
1
(N=265)
2
(N=756)
3
(N=1220)
4
(N=2018)
5
(N=2375)
98
(N=9)
99
(N=4)
1
(N=500)
2
(N=1300)
3
(N=2129)
4
(N=3793)
5
(N=4840)
98
(N=14)
99
(N=6)
situation
Mean (SD) 2.12 (6.33) 2.16 (0.794) 2.99 (3.27) 3.80 (0.803) 4.14 (2.82) 3.40 (1.14) 3.00 (0) 1.67 (0.827) 2.12 (0.786) 2.86 (0.854) 3.73 (0.826) 4.03 (2.14) 3.11 (1.36) 4.00 (0.816) 1.88 (4.39) 2.14 (0.789) 2.92 (2.23) 3.76 (0.816) 4.08 (2.51) 3.21 (1.25) 3.67 (0.816)
Median [Min, Max] 2.00 [1.00, 98.0] 2.00 [1.00, 5.00] 3.00 [1.00, 98.0] 4.00 [1.00, 5.00] 4.00 [1.00, 99.0] 3.00 [2.00, 5.00] 3.00 [3.00, 3.00] 1.00 [1.00, 5.00] 2.00 [1.00, 5.00] 3.00 [1.00, 5.00] 4.00 [1.00, 5.00] 4.00 [1.00, 99.0] 3.00 [1.00, 5.00] 4.00 [3.00, 5.00] 2.00 [1.00, 98.0] 2.00 [1.00, 5.00] 3.00 [1.00, 98.0] 4.00 [1.00, 5.00] 4.00 [1.00, 99.0] 3.00 [1.00, 5.00] 3.50 [3.00, 5.00]
table1(~ sad|gender*situation, data=cgssx)
## Warning in table1.formula(~sad | gender * situation, data = cgssx): Terms to
## the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
## Warning in .table1.internal(x = x, labels = labels, groupspan = groupspan, :
## Table has 20 columns. Are you sure this is what you want?
Overall
1
(N=251)
2
(N=861)
3
(N=1483)
4
(N=2144)
5
(N=1192)
98
(N=3)
99
(N=1)
1
(N=342)
2
(N=1153)
3
(N=1778)
4
(N=2265)
5
(N=1108)
99
(N=1)
1
(N=593)
2
(N=2014)
3
(N=3261)
4
(N=4409)
5
(N=2300)
98
(N=3)
99
(N=2)
sad
从无 23 (9.2%) 129 (15.0%) 375 (25.3%) 714 (33.3%) 555 (46.6%) 1 (33.3%) 0 (0%) 26 (7.6%) 143 (12.4%) 418 (23.5%) 680 (30.0%) 472 (42.6%) 1 (100%) 49 (8.3%) 272 (13.5%) 793 (24.3%) 1394 (31.6%) 1027 (44.7%) 1 (33.3%) 1 (50.0%)
很少 121 (48.2%) 557 (64.7%) 964 (65.0%) 1342 (62.6%) 600 (50.3%) 0 (0%) 1 (100%) 146 (42.7%) 729 (63.2%) 1156 (65.0%) 1490 (65.8%) 602 (54.3%) 0 (0%) 267 (45.0%) 1286 (63.9%) 2120 (65.0%) 2832 (64.2%) 1202 (52.3%) 0 (0%) 1 (50.0%)
较常 105 (41.8%) 173 (20.1%) 140 (9.4%) 86 (4.0%) 37 (3.1%) 1 (33.3%) 0 (0%) 167 (48.8%) 277 (24.0%) 196 (11.0%) 95 (4.2%) 31 (2.8%) 0 (0%) 272 (45.9%) 450 (22.3%) 336 (10.3%) 181 (4.1%) 68 (3.0%) 1 (33.3%) 0 (0%)
Missing 2 (0.8%) 2 (0.2%) 4 (0.3%) 2 (0.1%) 0 (0%) 1 (33.3%) 0 (0%) 3 (0.9%) 4 (0.3%) 8 (0.5%) 0 (0%) 3 (0.3%) 0 (0%) 5 (0.8%) 6 (0.3%) 12 (0.4%) 2 (0.0%) 3 (0.1%) 1 (33.3%) 0 (0%)