## 4.2 Confounding and causal order

### 4.2.1 Accounting for confounding and epiphenomenal association.

Here we load a couple necessary packages, load the data, and take a peek at them.

library(tidyverse)

glimpse(estress)
## Observations: 262
## Variables: 7
## $tenure <dbl> 1.67, 0.58, 0.58, 2.00, 5.00, 9.00, 0.00, 2.50, 0.50, 0.58, 9.00, 1.92, 2.00, 1.42, 0.92... ##$ estress  <dbl> 6.0, 5.0, 5.5, 3.0, 4.5, 6.0, 5.5, 3.0, 5.5, 6.0, 5.5, 4.0, 3.0, 2.5, 3.5, 6.0, 4.0, 6.0...
## $affect <dbl> 2.60, 1.00, 2.40, 1.16, 1.00, 1.50, 1.00, 1.16, 1.33, 3.00, 3.00, 2.00, 1.83, 1.16, 1.16... ##$ withdraw <dbl> 3.00, 1.00, 3.66, 4.66, 4.33, 3.00, 1.00, 1.00, 2.00, 4.00, 4.33, 1.00, 5.00, 1.66, 4.00...
## $sex <int> 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1... ##$ age      <int> 51, 45, 42, 50, 48, 48, 51, 47, 40, 43, 57, 36, 33, 29, 33, 48, 40, 45, 37, 42, 54, 57, ...
## $ese <dbl> 5.33, 6.05, 5.26, 4.35, 4.86, 5.05, 3.66, 6.13, 5.26, 4.00, 2.53, 6.60, 5.20, 5.66, 5.66... The lowerCor() function from the psych package makes it easy to estimate the lower triangle of a correlation matrix. psych::lowerCor(estress, digits = 3) ## tenure estrss affect wthdrw sex age ese ## tenure 1.000 ## estress 0.068 1.000 ## affect -0.065 0.340 1.000 ## withdraw -0.035 0.064 0.417 1.000 ## sex -0.003 0.133 0.046 0.050 1.000 ## age 0.266 0.066 -0.018 -0.035 0.083 1.000 ## ese -0.060 -0.158 -0.246 -0.243 0.028 -0.083 1.000 Let’s open brms. library(brms) Recall that if you want the correlations with Bayesian estimation and those sweet Bayesian credible intervals, you set up an intercept-only multivariate model. model1 <- brm(data = estress, family = gaussian, cbind(ese, estress, affect, withdraw) ~ 1, chains = 4, cores = 4) The summary: print(model1, digits = 3) ## Family: MV(gaussian, gaussian, gaussian, gaussian) ## Links: mu = identity; sigma = identity ## mu = identity; sigma = identity ## mu = identity; sigma = identity ## mu = identity; sigma = identity ## Formula: ese ~ 1 ## estress ~ 1 ## affect ~ 1 ## withdraw ~ 1 ## Data: estress (Number of observations: 262) ## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; ## total post-warmup samples = 4000 ## ## Population-Level Effects: ## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat ## ese_Intercept 5.607 0.061 5.485 5.725 4000 1.001 ## estress_Intercept 4.620 0.092 4.440 4.796 4000 1.000 ## affect_Intercept 1.598 0.045 1.508 1.687 4000 1.001 ## withdraw_Intercept 2.322 0.078 2.174 2.476 4000 0.999 ## ## Family Specific Parameters: ## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat ## sigma_ese 0.954 0.043 0.873 1.044 4000 1.000 ## sigma_estress 1.437 0.064 1.317 1.568 4000 0.999 ## sigma_affect 0.730 0.032 0.668 0.799 4000 0.999 ## sigma_withdraw 1.257 0.055 1.151 1.370 4000 0.999 ## ## Residual Correlations: ## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat ## rescor(ese,estress) -0.154 0.061 -0.272 -0.033 4000 1.000 ## rescor(ese,affect) -0.239 0.059 -0.352 -0.120 4000 0.999 ## rescor(estress,affect) 0.335 0.055 0.228 0.439 4000 1.000 ## rescor(ese,withdraw) -0.237 0.058 -0.347 -0.121 4000 1.000 ## rescor(estress,withdraw) 0.061 0.061 -0.059 0.181 4000 0.999 ## rescor(affect,withdraw) 0.410 0.052 0.306 0.509 4000 1.000 ## ## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample ## is a crude measure of effective sample size, and Rhat is the potential ## scale reduction factor on split chains (at convergence, Rhat = 1). Since we have posteriors for the correlations, why not plot them? Here we use the theme_black() from brms and a color scheme from the viridis package. library(viridis) posterior_samples(model1) %>% select(rescor__ese__estress, rescor__ese__affect, rescor__estress__withdraw) %>% gather() %>% ggplot(aes(x = value, fill = key)) + geom_density(alpha = .85, color = "transparent") + scale_fill_viridis(discrete = T, option = "D", direction = -1, labels = c(expression(paste(rho["ese, affect"])), expression(paste(rho["ese, estress"])), expression(paste(rho["estress, withdraw"]))), guide = guide_legend(label.hjust = 0, label.theme = element_text(size = 15, angle = 0, color = "white"), title.theme = element_blank())) + coord_cartesian(xlim = c(-1, 1)) + labs(title = "Our correlation density plot", x = NULL) + theme_black() + theme(panel.grid = element_blank(), axis.text.y = element_text(hjust = 0), axis.ticks.y = element_blank()) Before we fit our mediation model with brm(), we’ll specify the sub-models, here. y_model <- bf(withdraw ~ 1 + estress + affect + ese + sex + tenure) m_model <- bf(affect ~ 1 + estress + ese + sex + tenure) With y_model and m_model defined, we’re ready to fit. model2 <- brm(data = estress, family = gaussian, y_model + m_model + set_rescor(FALSE), chains = 4, cores = 4) Here’s the summary: print(model2, digits = 3) ## Family: MV(gaussian, gaussian) ## Links: mu = identity; sigma = identity ## mu = identity; sigma = identity ## Formula: withdraw ~ 1 + estress + affect + ese + sex + tenure ## affect ~ 1 + estress + ese + sex + tenure ## Data: estress (Number of observations: 262) ## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; ## total post-warmup samples = 4000 ## ## Population-Level Effects: ## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat ## withdraw_Intercept 2.751 0.554 1.663 3.823 4000 0.999 ## affect_Intercept 1.784 0.308 1.189 2.375 4000 0.999 ## withdraw_estress -0.093 0.052 -0.194 0.012 4000 1.000 ## withdraw_affect 0.708 0.105 0.508 0.912 4000 1.000 ## withdraw_ese -0.213 0.077 -0.366 -0.064 4000 0.999 ## withdraw_sex 0.126 0.143 -0.153 0.402 4000 0.999 ## withdraw_tenure -0.002 0.011 -0.024 0.019 4000 1.000 ## affect_estress 0.160 0.030 0.102 0.219 4000 0.999 ## affect_ese -0.155 0.045 -0.243 -0.069 4000 0.999 ## affect_sex 0.016 0.088 -0.159 0.190 4000 0.999 ## affect_tenure -0.011 0.006 -0.023 0.001 4000 0.999 ## ## Family Specific Parameters: ## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat ## sigma_withdraw 1.127 0.049 1.036 1.229 4000 1.000 ## sigma_affect 0.671 0.031 0.614 0.735 4000 1.000 ## ## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample ## is a crude measure of effective sample size, and Rhat is the potential ## scale reduction factor on split chains (at convergence, Rhat = 1). In the printout, notice how first within intercepts and then with covariates and sigma, the coefficients are presented as for withdraw first and then affect. Also notice how the coefficients for the covariates are presented in the same order for each criterions. Hopefully that’ll make it easier to sift through the printout. Happily, our coefficients are quite similar to those in Table 4.1. Here are the $$R^2$$ values. bayes_R2(model2) %>% round(digits = 3) ## Estimate Est.Error Q2.5 Q97.5 ## R2_withdraw 0.213 0.039 0.138 0.287 ## R2_affect 0.171 0.038 0.099 0.245 These are also in the same ballpark, but a little higher. Why not glance at their densities? bayes_R2(model2, summary = F) %>% as_tibble() %>% gather() %>% ggplot(aes(x = value, fill = key)) + geom_density(color = "transparent", alpha = .85) + scale_fill_manual(values = c(viridis_pal(option = "A")(7)[c(7, 3)]), labels = c("affect", "withdraw"), guide = guide_legend(title.theme = element_blank())) + scale_y_continuous(NULL, breaks = NULL) + coord_cartesian(xlim = 0:1) + labs(title = expression(paste("The ", italic("R")^{2}, " distributions for model2")), x = NULL) + theme_black() + theme(panel.grid = element_blank()) Here we retrieve the posterior samples, compute the indirect effect, and summarize the indirect effect with quantile(). post <- posterior_samples(model2) %>% mutate(ab = b_affect_estress*b_withdraw_affect) quantile(post$ab, probs = c(.5, .025, .975)) %>%
round(digits = 3)
##   50%  2.5% 97.5%
## 0.112 0.064 0.171

The results are similar to those in the text (p. 127). Here’s what it looks like.

post %>%

ggplot(aes(x = ab)) +
geom_density(color = "transparent",
fill = viridis_pal(option = "A")(7)[5]) +
geom_vline(xintercept = quantile(post\$ab, probs = c(.5, .025, .975)),
color = "black", linetype = c(1, 3, 3)) +
scale_y_continuous(NULL, breaks = NULL) +
xlab(expression(italic("ab"))) +
theme_black() +
theme(panel.grid = element_blank())

Once again, those sweet Bayesian credible intervals get the job done.

Here’s a way to get both the direct effect, $$c'$$ (i.e., b_withdraw_estress), and the total effect, $$c$$ (i.e., $$c'$$ + $$ab$$) of estress on withdraw.

post %>%
mutate(c = b_withdraw_estress + ab) %>%
rename(c_prime = b_withdraw_estress) %>%
select(c_prime, c) %>%
gather() %>%
group_by(key) %>%
summarize(mean = mean(value),
ll = quantile(value, probs = .025),
ul = quantile(value, probs = .975)) %>%
mutate_if(is_double, round, digits = 3)
## # A tibble: 2 x 4
##   key       mean     ll    ul
##   <chr>    <dbl>  <dbl> <dbl>
## 1 c        0.02  -0.086 0.13
## 2 c_prime -0.093 -0.194 0.012

Both appear pretty small. Which leads us to the next section…