17.1 emmeans package
Data set is from UCLA seminar where gender
and prog
are categorical
dat <- readRDS("data/exercise.rds") %>%
mutate(prog = factor(prog, labels = c("jog", "swim", "read"))) %>%
mutate(gender = factor(gender, labels = c("male", "female")))
17.1.1 Continuous by continuous
contcont <- lm(loss~hours*effort,data=dat)
summary(contcont)
#>
#> Call:
#> lm(formula = loss ~ hours * effort, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.52 -10.60 -1.78 11.13 34.51
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 7.79864 11.60362 0.672 0.5017
#> hours -9.37568 5.66392 -1.655 0.0982 .
#> effort -0.08028 0.38465 -0.209 0.8347
#> hours:effort 0.39335 0.18750 2.098 0.0362 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 13.56 on 896 degrees of freedom
#> Multiple R-squared: 0.07818, Adjusted R-squared: 0.07509
#> F-statistic: 25.33 on 3 and 896 DF, p-value: 9.826e-16
Simple slopes for a continuous by continuous model
Spotlight analysis (Aiken and West 2005): usually pick 3 values of moderating variable:
Mean Moderating Variable + \(\sigma \times\) (Moderating variable)
Mean Moderating Variable
Mean Moderating Variable - \(\sigma \times\) (Moderating variable)
effar <- round(mean(dat$effort) + sd(dat$effort), 1)
effr <- round(mean(dat$effort), 1)
effbr <- round(mean(dat$effort) - sd(dat$effort), 1)
# specify list of points
mylist <- list(effort = c(effbr, effr, effar))
# get the estimates
emtrends(contcont, ~ effort, var = "hours", at = mylist)
#> effort hours.trend SE df lower.CL upper.CL
#> 24.5 0.261 1.352 896 -2.392 2.91
#> 29.7 2.307 0.915 896 0.511 4.10
#> 34.8 4.313 1.308 896 1.745 6.88
#>
#> Confidence level used: 0.95
# plot
mylist <- list(hours = seq(0, 4, by = 0.4),
effort = c(effbr, effr, effar))
emmip(contcont, effort ~ hours, at = mylist, CIs = TRUE)
# statistical test for slope difference
emtrends(
contcont,
pairwise ~ effort,
var = "hours",
at = mylist,
adjust = "none"
)
#> $emtrends
#> effort hours.trend SE df lower.CL upper.CL
#> 24.5 0.261 1.352 896 -2.392 2.91
#> 29.7 2.307 0.915 896 0.511 4.10
#> 34.8 4.313 1.308 896 1.745 6.88
#>
#> Results are averaged over the levels of: hours
#> Confidence level used: 0.95
#>
#> $contrasts
#> contrast estimate SE df t.ratio p.value
#> effort24.5 - effort29.7 -2.05 0.975 896 -2.098 0.0362
#> effort24.5 - effort34.8 -4.05 1.931 896 -2.098 0.0362
#> effort29.7 - effort34.8 -2.01 0.956 896 -2.098 0.0362
#>
#> Results are averaged over the levels of: hours
The 3 p-values are the same as the interaction term.
For publication, we use
library(ggplot2)
# data
mylist <- list(hours = seq(0, 4, by = 0.4),
effort = c(effbr, effr, effar))
contcontdat <-
emmip(contcont,
effort ~ hours,
at = mylist,
CIs = TRUE,
plotit = FALSE)
contcontdat$feffort <- factor(contcontdat$effort)
levels(contcontdat$feffort) <- c("low", "med", "high")
# plot
p <-
ggplot(data = contcontdat,
aes(x = hours, y = yvar, color = feffort)) +
geom_line()
p1 <-
p +
geom_ribbon(aes(ymax = UCL, ymin = LCL, fill = feffort),
alpha = 0.4)
p1 + labs(x = "Hours",
y = "Weight Loss",
color = "Effort",
fill = "Effort")
17.1.2 Continuous by categorical
# use Female as basline
dat$gender <- relevel(dat$gender, ref = "female")
contcat <- lm(loss ~ hours * gender, data = dat)
summary(contcat)
#>
#> Call:
#> lm(formula = loss ~ hours * gender, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -27.118 -11.350 -1.963 10.001 42.376
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 3.335 2.731 1.221 0.222
#> hours 3.315 1.332 2.489 0.013 *
#> gendermale 3.571 3.915 0.912 0.362
#> hours:gendermale -1.724 1.898 -0.908 0.364
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 14.06 on 896 degrees of freedom
#> Multiple R-squared: 0.008433, Adjusted R-squared: 0.005113
#> F-statistic: 2.54 on 3 and 896 DF, p-value: 0.05523
Get simple slopes by each level of the categorical moderator
emtrends(contcat, ~ gender, var = "hours")
#> gender hours.trend SE df lower.CL upper.CL
#> female 3.32 1.33 896 0.702 5.93
#> male 1.59 1.35 896 -1.063 4.25
#>
#> Confidence level used: 0.95
# test difference in slopes
emtrends(contcat, pairwise ~ gender, var = "hours")
#> $emtrends
#> gender hours.trend SE df lower.CL upper.CL
#> female 3.32 1.33 896 0.702 5.93
#> male 1.59 1.35 896 -1.063 4.25
#>
#> Confidence level used: 0.95
#>
#> $contrasts
#> contrast estimate SE df t.ratio p.value
#> female - male 1.72 1.9 896 0.908 0.3639
# which is the same as the interaction term
17.1.3 Categorical by categorical
# relevel baseline
dat$prog <- relevel(dat$prog, ref = "read")
dat$gender <- relevel(dat$gender, ref = "female")
catcat <- lm(loss ~ gender * prog, data = dat)
summary(catcat)
#>
#> Call:
#> lm(formula = loss ~ gender * prog, data = dat)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -19.1723 -4.1894 -0.0994 3.7506 27.6939
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -3.6201 0.5322 -6.802 1.89e-11 ***
#> gendermale -0.3355 0.7527 -0.446 0.656
#> progjog 7.9088 0.7527 10.507 < 2e-16 ***
#> progswim 32.7378 0.7527 43.494 < 2e-16 ***
#> gendermale:progjog 7.8188 1.0645 7.345 4.63e-13 ***
#> gendermale:progswim -6.2599 1.0645 -5.881 5.77e-09 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 6.519 on 894 degrees of freedom
#> Multiple R-squared: 0.7875, Adjusted R-squared: 0.7863
#> F-statistic: 662.5 on 5 and 894 DF, p-value: < 2.2e-16
Simple effects
emcatcat <- emmeans(catcat, ~ gender*prog)
# differences in predicted values
contrast(emcatcat,
"revpairwise",
by = "prog",
adjust = "bonferroni")
#> prog = read:
#> contrast estimate SE df t.ratio p.value
#> male - female -0.335 0.753 894 -0.446 0.6559
#>
#> prog = jog:
#> contrast estimate SE df t.ratio p.value
#> male - female 7.483 0.753 894 9.942 <.0001
#>
#> prog = swim:
#> contrast estimate SE df t.ratio p.value
#> male - female -6.595 0.753 894 -8.762 <.0001
Plot
Bar graph
catcatdat <- emmip(catcat,
gender ~ prog,
CIs = TRUE,
plotit = FALSE)
p <-
ggplot(data = catcatdat,
aes(x = prog, y = yvar, fill = gender)) +
geom_bar(stat = "identity", position = "dodge")
p1 <-
p + geom_errorbar(
position = position_dodge(.9),
width = .25,
aes(ymax = UCL, ymin = LCL),
alpha = 0.3
)
p1 + labs(x = "Program", y = "Weight Loss", fill = "Gender")
References
Aiken, Leona S, and Stephen G West. 2005. “Interaction Effects.” Encyclopedia of Statistics in Behavioral Science.