A g-computation [Validation Overall]
A.1 Parametric g-formula
Based on Hernan and Robins, 2020 Chapter 13
Zou’s modified Poisson regression code
A.2 NEW FUNCTIONS
# Helpers g-computation
# standardization =============================================================
<- function(data, indices, treatment, outcome, formula1) {
standardization
<- enquo(treatment)
treatment1 <- enquo(outcome)
outcome1
# create a dataset with 3 copies of each subject
# 1st copy: equal to original one`
<- data[indices, ]
d <- d %>% mutate(interv = -1,
d seqno = 1:n()) # for gee estimation
# 2nd copy: treatment set to 0, outcome to missing
<- d %>% mutate(interv = 0,
d0 !!treatment1 := first(levels(droplevels(!!treatment1))),
!!outcome1 := NA)
# 3rd copy: treatment set to 1, outcome to missing
<- d %>% mutate(interv = 1,
d1 !!treatment1 := last(levels(droplevels(!!treatment1))),
!!outcome1 := NA)
<- rbind(d, d0, d1) # combining datasets
d.onesample
if (class(unlist(select(data, !!outcome1)))=="factor") {
<- d.onesample %>%
d.onesample mutate(!!outcome1 := as.numeric(!!outcome1)-1)
}
# linear model to estimate mean outcome conditional on treatment and confounders
# parameters are estimated using original observations only (interv= -1)
# parameter estimates are used to predict mean outcome for observations with set
# treatment (interv=0 and interv=1)
# fit <- glm(formula = formula1 ,data = d.onesample, family = "binomial")
# d.onesample$predicted_meanY <- predict(fit, d.onesample, type="response")
# Zou, 2004
require(geepack)
<- geeglm(formula = formula1,
fit data = d.onesample,
family = poisson(link = "log"),
id = seqno,
corstr = "exchangeable")
$predicted_meanY <- predict(fit, d.onesample, type="response")
d.onesample
# estimate mean outcome in each of the groups interv=-1, interv=0, and interv=1
<- mean(d.onesample$predicted_meanY[d.onesample$interv == -1], na.rm=T)
M.ob <- mean(d.onesample$predicted_meanY[d.onesample$interv == 0], na.rm=T)
M.t0 <- mean(d.onesample$predicted_meanY[d.onesample$interv == 1], na.rm=T)
M.t1 <- M.t1 - M.t0
M.t10
return(c(M.ob, M.t0, M.t1, M.t10))
}
# plot_eff =============================================================
<- function(dat1, dat2, dat3, coefs, legend.title = "Total Effect", model.names = c("Overall", "Rural", "Periurban"), size = 1, base_size = 13, xlims = c(-.3,1.3)) {
plot_eff library(ggsci)
$coeftable %>% as_tibble(rownames = "term") %>% mutate(model = model.names[1]) %>%
dat1rbind(dat2$coeftable %>% as_tibble(rownames = "term") %>% mutate(model = model.names[2])) %>%
rbind(dat3$coeftable %>% as_tibble(rownames = "term") %>% mutate(model = model.names[3])) %>%
clean_names() %>%
filter(term %in% coefs) %>%
ggplot(aes(y = model, x = est, col = model)) +
geom_pointrange(aes(xmin = x2_5_percent, xmax = x97_5_percent), size=size) +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_color_npg(limits = model.names) +
scale_y_discrete(limits = rev(model.names)) +
scale_x_continuous(limits = xlims) +
theme_bw(base_size = base_size) +
guides(color = guide_legend(title.position = "top",
title.hjust = 0)) +
labs(y = "", x = "Estimate", col = legend.title) +
theme(legend.position = "top")
}
# standardization_sim =============================================================
<- function(data, indices, treatment, outcome, formula1, p_out) {
standardization_sim
<- enquo(treatment)
treatment1 <- enquo(outcome)
outcome1
# create a dataset with 3 copies of each subject
# 1st copy: equal to original one`
<- data[indices, ]
d <- d %>% mutate(interv = -1,
d seqno = 1:n()) # for gee estimation
# 2nd copy: treatment set to 0, outcome to missing
<- d %>% mutate(interv = 0,
d0 !!treatment1 := first(levels(droplevels(!!treatment1))),
!!outcome1 := NA)
# 3rd copy: treatment set to 1, outcome to missing
<- d %>% mutate(interv = 1,
d1 !!treatment1 := last(levels(droplevels(!!treatment1))),
!!outcome1 := NA)
# 4th copy: treatment set to simulated prop, outcome to missing
<- d %>% mutate(interv = 2,
d2 !!treatment1 := rbinom(n=nrow(d), size=1, prob=p_out),
!!treatment1 := ifelse(!!treatment1 == 1,
"Outside", "Inside"),
!!outcome1 := NA)
<- rbind(d, d0, d1, d2) # combining datasets
d.onesample
if (class(unlist(select(data, !!outcome1)))=="factor") {
<- d.onesample %>%
d.onesample mutate(!!outcome1 := as.numeric(!!outcome1)-1)
}
# linear model to estimate mean outcome conditional on treatment and confounders
# parameters are estimated using original observations only (interv= -1)
# parameter estimates are used to predict mean outcome for observations with set
# treatment (interv=0 and interv=1)
# fit <- glm(formula = formula1 ,data = d.onesample, family = "binomial")
# d.onesample$predicted_meanY <- predict(fit, d.onesample, type="response")
# Zou, 2004
require(geepack)
<- geeglm(formula = formula1,
fit data = d.onesample,
family = poisson(link = "log"),
id = seqno,
corstr = "exchangeable")
$predicted_meanY <- predict(fit, d.onesample, type="response")
d.onesample
# estimate mean outcome in each of the groups interv=-1, interv=0, and interv=1
<- mean(d.onesample$predicted_meanY[d.onesample$interv == -1], na.rm=T)
M.ob <- mean(d.onesample$predicted_meanY[d.onesample$interv == 0], na.rm=T)
M.t0 <- mean(d.onesample$predicted_meanY[d.onesample$interv == 1], na.rm=T)
M.t1 <- mean(d.onesample$predicted_meanY[d.onesample$interv == 2], na.rm=T)
M.ts <- M.t1 - M.t0
M.t10 <- M.t1 - M.ts
M.t1s <- M.ts - M.t0
M.ts0 <- M.ts - M.ob
M.tsob
return(c(M.ob, M.t0, M.t1, M.t10, M.ts, M.t1s, M.ts0, M.tsob))
}
# tab_sim =============================================================
<- function(results) {
tab_sim <- c(sd(results$t[, 1]),
se sd(results$t[, 2]),
sd(results$t[, 3]),
sd(results$t[, 4]),
sd(results$t[, 5]),
sd(results$t[, 6]),
sd(results$t[, 7]),
sd(results$t[, 8]))
<- results$t0
mean <- mean - qnorm(0.975) * se
ll <- mean + qnorm(0.975) * se
ul
# OBS = observed, NE = No Exposed, FE = Exposed, SE = Simulated Exposure
<-data.frame(cbind(type=c("OBS",
bootstrap "NE",
"FE",
"FE - NE",
"SE",
"FE - SE",
"SE - NE",
"SE - OBS"),
%>%
mean, se, ll, ul)) mutate_at(.vars = c(2:5), ~as.numeric(as.character(.)))
return(bootstrap)
}
# standardization_bin =============================================================
<- function(data, indices, treatment, outcome, formula1, var_sim, cat_sim) {
standardization_bin
<- enquo(treatment)
treatment1 <- enquo(outcome)
outcome1 <- enquo(var_sim)
var_sim1
# create a dataset with 3 copies of each subject
# 1st copy: equal to original one`
<- data[indices, ]
d <- d %>% mutate(interv = -1,
d seqno = 1:n(), # for gee estimation
cat = !!var_sim1)
# # 2nd copy: treatment set to 0, outcome to missing
# d0 <- d %>% mutate(interv = 0,
# !!treatment1 := first(levels(droplevels(!!treatment1))),
# !!outcome1 := NA)
# # 3rd copy: treatment set to 1, outcome to missing
# d1 <- d %>% mutate(interv = 1,
# !!treatment1 := last(levels(droplevels(!!treatment1))),
# !!outcome1 := NA)
# 4th copy: treatment set to 0 for cat_sim, outcome to missing
<- d %>% mutate(interv = 2,
d2 !!treatment1 := as.character(!!treatment1),
!!treatment1 := ifelse(cat == cat_sim,
"Inside", !!treatment1),
!!treatment1 := as.factor(!!treatment1),
!!outcome1 := NA)
# 5th copy: treatment set to 1 for cat_sim, outcome to missing
<- d %>% mutate(interv = 3,
d3 !!treatment1 := as.character(!!treatment1),
!!treatment1 := ifelse(cat != cat_sim,
"Outside", !!treatment1),
!!treatment1 := as.factor(!!treatment1),
!!outcome1 := NA)
<- rbind(d, d2, d3) # combining datasets
d.onesample
if (class(unlist(select(data, !!outcome1)))=="factor") {
<- d.onesample %>%
d.onesample mutate(!!outcome1 := as.numeric(!!outcome1)-1)
}
# linear model to estimate mean outcome conditional on treatment and confounders
# parameters are estimated using original observations only (interv= -1)
# parameter estimates are used to predict mean outcome for observations with set
# treatment (interv=0 and interv=1)
# fit <- glm(formula = formula1 ,data = d.onesample, family = "binomial")
# d.onesample$predicted_meanY <- predict(fit, d.onesample, type="response")
# Zou, 2004
require(geepack)
<- geeglm(formula = formula1,
fit data = d.onesample,
family = poisson(link = "log"),
id = seqno,
corstr = "exchangeable")
$predicted_meanY <- predict(fit, d.onesample, type="response")
d.onesample
# estimate mean outcome in each of the groups interv=-1, interv=0, and interv=1
<- mean(d.onesample$predicted_meanY[d.onesample$interv == -1], na.rm=T)
M.ob <- mean(d.onesample$predicted_meanY[d.onesample$interv == 2], na.rm=T)
M.tc0 <- mean(d.onesample$predicted_meanY[d.onesample$interv == 3], na.rm=T)
M.tc1 <- M.tc1 - M.tc0
M.tc10 <- M.tc1 - M.ob
M.tc1ob <- M.tc0 - M.ob
M.tc0ob
return(c(M.ob, M.tc0, M.tc1, M.tc10, M.tc1ob, M.tc0ob))
}
# tab_bin =============================================================
<- function(results, cat_sim) {
tab_bin <- c(sd(results$t[, 1]),
se sd(results$t[, 2]),
sd(results$t[, 3]),
sd(results$t[, 4]),
sd(results$t[, 5]),
sd(results$t[, 6]))
<- results$t0
mean <- mean - qnorm(0.975) * se
ll <- mean + qnorm(0.975) * se
ul
# OBS = observed, NE = No Exposed, FE = Exposed
<-data.frame(cbind(type=c("OBS",
bootstrap "NE",
"FE",
"FE - NE",
"FE - OBS",
"NE - OBS"),
%>%
mean, se, ll, ul)) mutate_at(.vars = c(2:5), ~as.numeric(as.character(.)))
return(bootstrap)
}
A.2.1 ESTIMATION
<- SEROPOSITIVE ~ work_out + edad + nm_sex
f
library(boot)
# bootstrap
<- boot(data = dat,
results statistic = standardization,
treatment = work_out,
outcome = SEROPOSITIVE,
formula1 = f, #parameters from function
R = 5)
# generating confidence intervals
<- c(sd(results$t[, 1]),
se sd(results$t[, 2]),
sd(results$t[, 3]),
sd(results$t[, 4]))
<- results$t0
mean <- mean - qnorm(0.975) * se
ll <- mean + qnorm(0.975) * se
ul
<-data.frame(cbind(type=c("Observed",
bootstrap "No Treatment",
"Treatment",
"Treatment - No Treatment"),
%>%
mean, se, ll, ul)) mutate_at(.vars = c(2:5), ~as.numeric(as.character(.)))
kable(bootstrap, digits = 5)
type | mean | se | ll | ul |
---|---|---|---|---|
Observed | 0.48771 | 0.01295 | 0.46233 | 0.51309 |
No Treatment | 0.41185 | 0.01626 | 0.37998 | 0.44371 |
Treatment | 0.62928 | 0.01428 | 0.60130 | 0.65727 |
Treatment - No Treatment | 0.21744 | 0.01519 | 0.18766 | 0.24722 |
A.3 Benchmark [RISCA package]
To compute Marginal effects of the treatment (ATE) based on RISCA package
A.3.1 Using logistic regression as the Q-model
library(RISCA)
<- dat %>%
dat_gf ::select(travel_1m, work_out, edad, SEROPOSITIVE, nm_sex) %>%
dplyrmutate(id = seq(1:n()),
time = 0,
travel_1m = as.integer(as.numeric(travel_1m)-1),
work_out = as.numeric(work_out)-1,
SEROPOSITIVE = as.numeric(SEROPOSITIVE)-1,
edad = as.numeric(edad)) %>%
filter(complete.cases(.)) %>%
as.data.frame()
<- glm(formula = f, data=dat_gf,
glm.multi family = binomial(link=logit))
<- gc.logistic(glm.obj=glm.multi, data=dat_gf,
gc.ate1 group = "work_out", effect="ATE",
var.method="bootstrap", iterations=1000,
n.cluster=1)
<-data.frame(cbind(type=c("No Treatment",
gc.ate1.dat "Treatment",
"Treatment - No Treatment"),
bind_rows(gc.ate1$p0, gc.ate1$p1,
$delta)))
gc.ate1
kable(gc.ate1.dat, digits = 5)
type | estimate | ci.lower | ci.upper | std.error | |
---|---|---|---|---|---|
2.5%…1 | No Treatment | 0.42545 | 0.39665 | 0.45410 | NA |
2.5%…2 | Treatment | 0.65798 | 0.60958 | 0.70779 | NA |
2.5%…3 | Treatment - No Treatment | 0.23252 | 0.17215 | 0.28979 | 0.03071 |