A g-computation [Validation Overall]

A.1 Parametric g-formula

Based on Hernan and Robins, 2020 Chapter 13

Zou’s modified Poisson regression code

A.2 NEW FUNCTION

# function to calculate difference in means
standardization <- function(data, indices, treatment, outcome, formula1) {
  
  treatment1 <- enquo(treatment)
  outcome1 <- enquo(outcome)
  
  # create a dataset with 3 copies of each subject
  d <- data[indices, ] # 1st copy: equal to original one`
  d <- d %>% mutate(interv=-1,
                    seqno = 1:n()) # for gee estimation
  d0 <- d # 2nd copy: treatment set to 0, outcome to missing
  d0 <- d0 %>% mutate(interv=0,
                      !!treatment1 := first(levels(droplevels(!!treatment1))),
                      !!outcome1 := NA)
  d1 <- d # 3rd copy: treatment set to 1, outcome to missing
  d1 <- d1 %>% mutate(interv=1,
                      !!treatment1 := last(levels(droplevels(!!treatment1))),
                      !!outcome1 := NA)
  d.onesample <- rbind(d, d0, d1) # combining datasets
  
  # linear model to estimate mean outcome conditional on treatment and confounders
  # parameters are estimated using original observations only (interv= -1)
  # parameter estimates are used to predict mean outcome for observations with set
  # treatment (interv=0 and interv=1)
  fit <- glm(formula = formula1 ,data = d.onesample, family = "binomial")
  d.onesample$predicted_meanY <- predict(fit, d.onesample, type="response")
  
    # Zou, 2004
    # require(geepack)
    # fit <- geeglm(formula = formula1,
    #               data    = d.onesample,
    #               family  = poisson(link = "log"),
    #               id      = seqno,
    #               corstr  = "exchangeable")
    # 
    # d.onesample$predicted_meanY <- predict(fit, d.onesample, type="response")
  
  # estimate mean outcome in each of the groups interv=-1, interv=0, and interv=1
  return(c(
    mean(d.onesample$predicted_meanY[d.onesample$interv == -1], na.rm=T),
    mean(d.onesample$predicted_meanY[d.onesample$interv == 0], na.rm=T),
    mean(d.onesample$predicted_meanY[d.onesample$interv == 1], na.rm=T),
    mean(d.onesample$predicted_meanY[d.onesample$interv == 1], na.rm=T) -
      mean(d.onesample$predicted_meanY[d.onesample$interv == 0], na.rm=T)
  ))
}

A.2.1 ESTIMATION

f <- SEROPOSITIVE ~ work_out1 + edad + sex_male

library(boot)

# bootstrap
results <- boot(data = dat,
                statistic = standardization, 
                treatment = work_out1, outcome = SEROPOSITIVE, formula1 = f, #parameters from function
                R = 5)

# generating confidence intervals
se <- c(sd(results$t[, 1]),
        sd(results$t[, 2]),
        sd(results$t[, 3]),
        sd(results$t[, 4]))
mean <- results$t0
ll <- mean - qnorm(0.975) * se
ul <- mean + qnorm(0.975) * se

bootstrap <-data.frame(cbind(type=c("Observed", "No Treatment", "Treatment", "Treatment - No Treatment"),
                             mean, se, ll, ul)) %>%
  mutate_at(.vars = c(2:5), ~as.numeric(as.character(.)))

kable(bootstrap, digits = 5)
type mean se ll ul
Observed 0.48771 0.01375 0.46076 0.51466
No Treatment 0.41885 0.01957 0.38049 0.45721
Treatment 0.69605 0.02694 0.64325 0.74885
Treatment - No Treatment 0.27720 0.03356 0.21141 0.34298

A.3 RISCA package

To compute Marginal effects of the treatment (ATE) based on RISCA package

A.3.1 Using logistic regression as the Q-model

library(RISCA)

dat_gf <- dat %>%
  dplyr::select(viaje_ult_mes, work_out1, edad, SEROPOSITIVE, sex_male) %>%
  mutate(id = seq(1:n()),
         time = 0,
         viaje_ult_mes = as.integer(as.numeric(viaje_ult_mes)-1),
         work_out1 = as.numeric(work_out1)-1, 
         SEROPOSITIVE = as.integer(SEROPOSITIVE),
         edad = as.numeric(edad)) %>%
  filter(complete.cases(.)) %>%
  as.data.frame()

glm.multi <- glm(formula = f, data=dat_gf, family = binomial(link=logit))

gc.ate1 <- gc.logistic(glm.obj=glm.multi, data=dat_gf, group = "work_out1", effect="ATE",
                       var.method="bootstrap", iterations=1000, n.cluster=1)

gc.ate1.dat <-data.frame(cbind(type=c("No Treatment", "Treatment", "Treatment - No Treatment"),
                             bind_rows(gc.ate1$p0, gc.ate1$p1, gc.ate1$delta)))

kable(gc.ate1.dat, digits = 5)
type estimate ci.lower ci.upper std.error
2.5%…1 No Treatment 0.41906 0.38969 0.44671 NA
2.5%…2 Treatment 0.69606 0.64620 0.74445 NA
2.5%…3 Treatment - No Treatment 0.27701 0.21722 0.33818 0.03015