## 39.3 Neutral Controls

### 39.3.1 Good Predictive Controls

Good for precision

# cleans workspace
rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; z->y}")

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=2, y=2),
y = c(x=1, z=2, y=1))

## ggplot
ggdag(model) + theme_dag()

Controlling for $$Z$$ does not help or hurt identification, but it can increase precision (i.e., reducing SE)

n <- 1e4
z <- rnorm(n)
x <- rnorm(n)
y <- x + 2 * z + rnorm(n)

jtools::export_summs(lm(y ~ x), lm(y ~ x + z))
Table 38.13:
Model 1Model 2
(Intercept)0.01    0.01
(0.02)   (0.01)
x1.00 ***1.01 ***
(0.02)   (0.01)
z       2.00 ***
(0.01)
N10000       10000
R20.17    0.83
*** p < 0.001; ** p < 0.01; * p < 0.05.

Similar coefficients, but smaller SE when controlling for $$Z$$

Another variation is

# cleans workspace
rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; x->m; z->m; m->y}")

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=2, m=2, y=3),
y = c(x=1, z=2, m=1, y=1))

## ggplot
ggdag(model) + theme_dag()

n <- 1e4
z <- rnorm(n)
x <- rnorm(n)
m <- 2 * z + rnorm(n)
y <- x + 2 * m + rnorm(n)

jtools::export_summs(lm(y ~ x), lm(y ~ x + z))
Table 38.14:
Model 1Model 2
(Intercept)-0.00    -0.00
(0.05)   (0.02)
x0.97 ***0.99 ***
(0.05)   (0.02)
z       4.02 ***
(0.02)
N10000       10000
R20.04    0.77
*** p < 0.001; ** p < 0.01; * p < 0.05.

Controlling for $$Z$$ can reduce SE

### 39.3.2 Good Selection Bias

# cleans workspace
rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; x->z; z->w; u->w;u->y}")

# set u as latent
latents(model) <- "u"

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=2, w=3, u=3, y=5),
y = c(x=3, z=2, w=1, u=4, y=3))

## ggplot
ggdag(model) + theme_dag()

2. Controlling for Z can increase SE
3. Controlling for Z while having on W can help identify X
n <- 1e4
x <- rnorm(n)
u <- rnorm(n)
z <- x + rnorm(n)
w <- z + u + rnorm(n)
y <- x - 2*u + rnorm(n)

jtools::export_summs(lm(y ~ x), lm(y ~ x + w), lm(y ~ x + z + w))
Table 38.15:
Model 1Model 2Model 3
(Intercept)0.01    0.01    0.03
(0.02)   (0.02)   (0.02)
x0.99 ***1.65 ***0.99 ***
(0.02)   (0.02)   (0.02)
w       -0.67 ***-1.01 ***
(0.01)   (0.01)
z              1.02 ***
(0.02)
N10000       10000       10000
R20.16    0.39    0.50
*** p < 0.001; ** p < 0.01; * p < 0.05.

# cleans workspace
rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; z->x}")

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=1, y=2),
y = c(x=1, z=2, y=1))

## ggplot
ggdag(model) + theme_dag()

n <- 1e4
z <- rnorm(n)
x <- 2 * z + rnorm(n)
y <- x + 2 * rnorm(n)

jtools::export_summs(lm(y ~ x), lm(y ~ x + z))
Table 38.16:
Model 1Model 2
(Intercept)-0.02    -0.02
(0.02)   (0.02)
x0.99 ***1.00 ***
(0.01)   (0.02)
z       -0.00
(0.04)
N10000       10000
R20.55    0.55
*** p < 0.001; ** p < 0.01; * p < 0.05.

Similar coefficients, but greater SE when controlling for $$Z$$

Another variation is

rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; x->z}")

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=1, y=2),
y = c(x=1, z=2, y=1))

## ggplot
ggdag(model) + theme_dag()

set.seed(1)
n <- 1e4
x <- rnorm(n)
z <- 2 * x + rnorm(n)
y <- x + 2 * rnorm(n)

jtools::export_summs(lm(y ~ x), lm(y ~ x + z))
Table 38.17:
Model 1Model 2
(Intercept)0.02    0.02
(0.02)   (0.02)
x1.00 ***0.99 ***
(0.02)   (0.05)
z       0.00
(0.02)
N10000       10000
R20.20    0.20
*** p < 0.001; ** p < 0.01; * p < 0.05.

Worse SE when controlling for $$Z$$ ($$0.02 < 0.05$$)

# cleans workspace
rm(list = ls())

# DAG

## specify edges
model <- dagitty("dag{x->y; x->z}")

## coordinates for plotting
coordinates(model) <-  list(
x = c(x=1, z=2, y=2),
y = c(x=1, z=2, y=1))

## ggplot
ggdag(model) + theme_dag()

Not all post-treatment variables are bad.

Controlling for $$Z$$ is neutral, but it might hurt the precision of the causal effect.