8.8 Application

R Packages for mixed models

  • nlme

    • has nested structure

    • flexible for complex design

    • not user-friendly

  • lme4

  • Others

    • Bayesian setting: MCMCglmm, brms

    • For genetics: ASReml

8.8.1 Example 1 (Pulps)

Model:

\[ y_{ij} = \mu + \alpha_i + \epsilon_{ij} \]

where

  • \(i = 1,..,a\) groups for random effect \(\alpha_i\)
  • \(j = 1,...,n\) individuals in each group
  • \(\alpha_i \sim N(0, \sigma^2_\alpha)\) is random effects
  • \(\epsilon_{ij} \sim N(0, \sigma^2_\epsilon)\) is random effects
  • Imply compound symmetry model where the intraclass correlation coefficient is: \(\rho = \frac{\sigma^2_\alpha}{\sigma^2_\alpha + \sigma^2_\epsilon}\)
  • If factor \(a\) does not explain much variation, low correlation within the levels: \(\sigma^2_\alpha \to 0\) then \(\rho \to 0\)
  • If factor \(a\) explain much variation, high correlation within the levels \(\sigma^2_\alpha \to \infty\) hence, \(\rho \to 1\)
data(pulp, package = "faraway")
plot(
    y    = pulp$bright,
    x    = pulp$operator,
    xlab = "Operator",
    ylab = "Brightness"
)

pulp %>% dplyr::group_by(operator) %>% 
    dplyr::summarise(average = mean(bright))
#> # A tibble: 4 × 2
#>   operator average
#>   <fct>      <dbl>
#> 1 a           60.2
#> 2 b           60.1
#> 3 c           60.6
#> 4 d           60.7

lmer application

library(lme4)
mixed_model <-
    lmer(
        # pipe (i..e, | ) denotes random-effect terms
        formula = bright ~ 1 + (1 |operator), 
         data = pulp)
summary(mixed_model)
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: bright ~ 1 + (1 | operator)
#>    Data: pulp
#> 
#> REML criterion at convergence: 18.6
#> 
#> Scaled residuals: 
#>     Min      1Q  Median      3Q     Max 
#> -1.4666 -0.7595 -0.1244  0.6281  1.6012 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev.
#>  operator (Intercept) 0.06808  0.2609  
#>  Residual             0.10625  0.3260  
#> Number of obs: 20, groups:  operator, 4
#> 
#> Fixed effects:
#>             Estimate Std. Error t value
#> (Intercept)  60.4000     0.1494   404.2
coef(mixed_model)
#> $operator
#>   (Intercept)
#> a    60.27806
#> b    60.14088
#> c    60.56767
#> d    60.61340
#> 
#> attr(,"class")
#> [1] "coef.mer"
fixef(mixed_model)   # fixed effects
#> (Intercept) 
#>        60.4
confint(mixed_model) # confidence interval
#>                 2.5 %     97.5 %
#> .sig01       0.000000  0.6178987
#> .sigma       0.238912  0.4821845
#> (Intercept) 60.071299 60.7287012
ranef(mixed_model)   # random effects
#> $operator
#>   (Intercept)
#> a  -0.1219403
#> b  -0.2591231
#> c   0.1676679
#> d   0.2133955
#> 
#> with conditional variances for "operator"
VarCorr(mixed_model) # random effects standard deviation
#>  Groups   Name        Std.Dev.
#>  operator (Intercept) 0.26093 
#>  Residual             0.32596
re_dat = as.data.frame(VarCorr(mixed_model))

# rho based on the above formula
rho = re_dat[1, 'vcov'] / (re_dat[1, 'vcov'] + re_dat[2, 'vcov'])
rho
#> [1] 0.3905354

To Satterthwaite approximation for the denominator df, we use lmerTest

library(lmerTest)
summary(lmerTest::lmer(bright ~ 1 + (1 | operator), pulp))$coefficients
#>             Estimate Std. Error df  t value     Pr(>|t|)
#> (Intercept)     60.4  0.1494434  3 404.1664 3.340265e-08
confint(mixed_model)[3, ]
#>   2.5 %  97.5 % 
#> 60.0713 60.7287

In this example, we can see that the confidence interval computed by confint in lmer package is very close is confint in lmerTest model.

MCMglmm application

under the Bayesian framework

library(MCMCglmm)
mixed_model_bayes <-
    MCMCglmm(
        bright ~ 1,
        random =  ~ operator,
        data = pulp,
        verbose = FALSE
    )
summary(mixed_model_bayes)$solutions
#>             post.mean l-95% CI u-95% CI eff.samp pMCMC
#> (Intercept)  60.40449  60.2055 60.66595     1000 0.001

this method offers the confidence interval slightly more positive than lmer and lmerTest

8.8.1.1 Prediction

# random effects prediction (BLUPs)
ranef(mixed_model)$operator
#>   (Intercept)
#> a  -0.1219403
#> b  -0.2591231
#> c   0.1676679
#> d   0.2133955

# prediction for each categories
fixef(mixed_model) + ranef(mixed_model)$operator 
#>   (Intercept)
#> a    60.27806
#> b    60.14088
#> c    60.56767
#> d    60.61340

# equivalent to the above method
predict(mixed_model, newdata = data.frame(operator = c('a', 'b', 'c', 'd'))) 
#>        1        2        3        4 
#> 60.27806 60.14088 60.56767 60.61340

use bootMer() to get bootstrap-based confidence intervals for predictions.

Another example using GLMM in the context of blocking

Penicillin data

data(penicillin, package = "faraway")
summary(penicillin)
#>  treat    blend       yield   
#>  A:5   Blend1:4   Min.   :77  
#>  B:5   Blend2:4   1st Qu.:81  
#>  C:5   Blend3:4   Median :87  
#>  D:5   Blend4:4   Mean   :86  
#>        Blend5:4   3rd Qu.:89  
#>                   Max.   :97
library(ggplot2)
ggplot(penicillin, aes(
    y     = yield,
    x     = treat,
    shape = blend,
    color = blend
)) + 
    # treatment = fixed effect
    # blend = random effects
    geom_point(size = 3) +
    xlab("Treatment")


library(lmerTest) # for p-values
mixed_model <- lmerTest::lmer(yield ~ treat + (1 | blend),
                              data = penicillin)
summary(mixed_model)
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: yield ~ treat + (1 | blend)
#>    Data: penicillin
#> 
#> REML criterion at convergence: 103.8
#> 
#> Scaled residuals: 
#>     Min      1Q  Median      3Q     Max 
#> -1.4152 -0.5017 -0.1644  0.6830  1.2836 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev.
#>  blend    (Intercept) 11.79    3.434   
#>  Residual             18.83    4.340   
#> Number of obs: 20, groups:  blend, 5
#> 
#> Fixed effects:
#>             Estimate Std. Error     df t value Pr(>|t|)    
#> (Intercept)   84.000      2.475 11.075  33.941 1.51e-12 ***
#> treatB         1.000      2.745 12.000   0.364   0.7219    
#> treatC         5.000      2.745 12.000   1.822   0.0935 .  
#> treatD         2.000      2.745 12.000   0.729   0.4802    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Correlation of Fixed Effects:
#>        (Intr) treatB treatC
#> treatB -0.555              
#> treatC -0.555  0.500       
#> treatD -0.555  0.500  0.500

#The BLUPs for the each blend
ranef(mixed_model)$blend
#>        (Intercept)
#> Blend1   4.2878788
#> Blend2  -2.1439394
#> Blend3  -0.7146465
#> Blend4   1.4292929
#> Blend5  -2.8585859

Examine treatment effect

anova(mixed_model) # p-value based on lmerTest
#> Type III Analysis of Variance Table with Satterthwaite's method
#>       Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
#> treat     70  23.333     3    12  1.2389 0.3387

Since the p-value is greater than 0.05, we can’t reject the null hypothesis that there is no treatment effect.

library(pbkrtest)
# REML is not appropriate for testing fixed effects, it should be ML
full_model <-
    lmer(yield ~ treat + (1 | blend), 
         penicillin, 
         REML = FALSE) 
null_model <- lmer(yield ~ 1 + (1 | blend), 
                   penicillin, 
                   REML = FALSE)

# use  Kenward-Roger approximation for df
KRmodcomp(full_model, null_model) 
#> large : yield ~ treat + (1 | blend)
#> small : yield ~ 1 + (1 | blend)
#>          stat     ndf     ddf F.scaling p.value
#> Ftest  1.2389  3.0000 12.0000         1  0.3387

Since the p-value is greater than 0.05, and consistent with our previous observation, we conclude that we can’t reject the null hypothesis that there is no treatment effect.

8.8.2 Example 2 (Rats)

rats <- read.csv(
    "images/rats.dat",
    header = F,
    sep = ' ',
    col.names = c('Treatment', 'rat', 'age', 'y')
)

# log transformed age
rats$t <- log(1 + (rats$age - 45) / 10) 

We are interested in whether treatment effect induces changes over time.

rat_model <-
    # treatment = fixed effect, rat = random effects
    lmerTest::lmer(y ~ t:Treatment + (1 | rat), 
                   data = rats) 
summary(rat_model)
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: y ~ t:Treatment + (1 | rat)
#>    Data: rats
#> 
#> REML criterion at convergence: 932.4
#> 
#> Scaled residuals: 
#>      Min       1Q   Median       3Q      Max 
#> -2.25574 -0.65898 -0.01163  0.58356  2.88309 
#> 
#> Random effects:
#>  Groups   Name        Variance Std.Dev.
#>  rat      (Intercept) 3.565    1.888   
#>  Residual             1.445    1.202   
#> Number of obs: 252, groups:  rat, 50
#> 
#> Fixed effects:
#>                Estimate Std. Error       df t value Pr(>|t|)    
#> (Intercept)     68.6074     0.3312  89.0275  207.13   <2e-16 ***
#> t:Treatmentcon   7.3138     0.2808 247.2762   26.05   <2e-16 ***
#> t:Treatmenthig   6.8711     0.2276 247.7097   30.19   <2e-16 ***
#> t:Treatmentlow   7.5069     0.2252 247.5196   33.34   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Correlation of Fixed Effects:
#>             (Intr) t:Trtmntc t:Trtmnth
#> t:Tretmntcn -0.327                    
#> t:Tretmnthg -0.340  0.111             
#> t:Tretmntlw -0.351  0.115     0.119
anova(rat_model)
#> Type III Analysis of Variance Table with Satterthwaite's method
#>             Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
#> t:Treatment 3181.9  1060.6     3 223.21  734.11 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Since the p-value is significant, we can be confident concluding that there is a treatment effect

8.8.3 Example 3 (Agridat)

library(agridat)
library(latticeExtra)
dat <- harris.wateruse
# Compare to Schabenberger & Pierce, fig 7.23
useOuterStrips(
    xyplot(
        water ~ day | species * age,
        dat,
        as.table = TRUE,
        group = tree,
        type = c('p', 'smooth'),
        main = "harris.wateruse 2 species, 2 ages (10 trees each)"
    )
)

Remove outliers

dat <- subset(dat, day!=268)

Plot between age and species

xyplot(
    water ~ day | tree,
    dat,
    subset   = age == "A2" & species == "S2",
    as.table = TRUE,
    type     = c('p', 'smooth'),
    ylab     = "Water use profiles of individual trees",
    main     = "harris.wateruse (Age 2, Species 2)"
)

# Rescale day for nicer output, 
# and convergence issues, add quadratic term
dat <- transform(dat, ti = day / 100)
dat <- transform(dat, ti2 = ti * ti)
# Start with a subgroup: age 2, species 2
d22 <- droplevels(subset(dat, age == "A2" & species == "S2"))

lme function from nlme package

library(nlme)

## We use pdDiag() to get uncorrelated random effects
m1n <- lme(
    water ~ 1 + ti + ti2,
    #intercept, time and time-squared = fixed effects
    data = d22,
    na.action = na.omit,
    random = list(tree = pdDiag(~ 1 + ti + ti2)) 
    # random intercept, time 
    # and time squared per tree = random effects
)
ranef(m1n)
#>     (Intercept)            ti           ti2
#> T04   0.1985796  1.609864e-09  4.990101e-10
#> T05   0.3492827  2.487690e-10 -4.845287e-11
#> T19  -0.1978989 -7.681202e-10 -1.961453e-10
#> T23   0.4519003 -3.270426e-10 -2.413583e-10
#> T38  -0.6457494 -1.608770e-09 -3.298010e-10
#> T40   0.3739432  3.264705e-10 -2.543109e-11
#> T49   0.8620648  9.021831e-10 -5.402247e-12
#> T53  -0.5655049 -8.279040e-10 -4.579291e-11
#> T67  -0.4394623 -3.485113e-10  2.147434e-11
#> T71  -0.3871552  7.930610e-10  3.718993e-10
fixef(m1n)
#> (Intercept)          ti         ti2 
#>  -10.798799   12.346704   -2.838503
summary(m1n)
#> Linear mixed-effects model fit by REML
#>   Data: d22 
#>        AIC     BIC    logLik
#>   276.5142 300.761 -131.2571
#> 
#> Random effects:
#>  Formula: ~1 + ti + ti2 | tree
#>  Structure: Diagonal
#>         (Intercept)           ti          ti2  Residual
#> StdDev:   0.5187869 1.438333e-05 3.864019e-06 0.3836614
#> 
#> Fixed effects:  water ~ 1 + ti + ti2 
#>                  Value Std.Error  DF   t-value p-value
#> (Intercept) -10.798799 0.8814666 227 -12.25094       0
#> ti           12.346704 0.7827112 227  15.77428       0
#> ti2          -2.838503 0.1720614 227 -16.49704       0
#>  Correlation: 
#>     (Intr) ti    
#> ti  -0.979       
#> ti2  0.970 -0.997
#> 
#> Standardized Within-Group Residuals:
#>         Min          Q1         Med          Q3         Max 
#> -3.07588246 -0.58531056  0.01210209  0.65402695  3.88777402 
#> 
#> Number of Observations: 239
#> Number of Groups: 10

lmer function from lme4 package

m1lmer <-
    lmer(water ~ 1 + ti + ti2 + (ti + ti2 ||
                                     tree),
         data = d22,
         na.action = na.omit)
ranef(m1lmer)
#> $tree
#>     (Intercept) ti ti2
#> T04   0.1985796  0   0
#> T05   0.3492827  0   0
#> T19  -0.1978989  0   0
#> T23   0.4519003  0   0
#> T38  -0.6457494  0   0
#> T40   0.3739432  0   0
#> T49   0.8620648  0   0
#> T53  -0.5655049  0   0
#> T67  -0.4394623  0   0
#> T71  -0.3871552  0   0
#> 
#> with conditional variances for "tree"

Notes:

  • || double pipes= uncorrelated random effects

  • To remove the intercept term:

    • (0+ti|tree)

    • (ti-1|tree)

fixef(m1lmer)
#> (Intercept)          ti         ti2 
#>  -10.798799   12.346704   -2.838503
m1l <-
    lmer(water ~ 1 + ti + ti2 
         + (1 | tree) + (0 + ti | tree) 
         + (0 + ti2 | tree), data = d22)
ranef(m1l)
#> $tree
#>     (Intercept) ti ti2
#> T04   0.1985796  0   0
#> T05   0.3492827  0   0
#> T19  -0.1978989  0   0
#> T23   0.4519003  0   0
#> T38  -0.6457494  0   0
#> T40   0.3739432  0   0
#> T49   0.8620648  0   0
#> T53  -0.5655049  0   0
#> T67  -0.4394623  0   0
#> T71  -0.3871552  0   0
#> 
#> with conditional variances for "tree"
fixef(m1l)
#> (Intercept)          ti         ti2 
#>  -10.798799   12.346704   -2.838503

To include structured covariance terms, we can use the following way

m2n <- lme(
    water ~ 1 + ti + ti2,
    data = d22,
    random = ~ 1 | tree,
    cor = corExp(form =  ~ day | tree),
    na.action = na.omit
)
ranef(m2n)
#>     (Intercept)
#> T04   0.1929971
#> T05   0.3424631
#> T19  -0.1988495
#> T23   0.4538660
#> T38  -0.6413664
#> T40   0.3769378
#> T49   0.8410043
#> T53  -0.5528236
#> T67  -0.4452930
#> T71  -0.3689358
fixef(m2n)
#> (Intercept)          ti         ti2 
#>  -11.223310   12.712094   -2.913682
summary(m2n)
#> Linear mixed-effects model fit by REML
#>   Data: d22 
#>        AIC      BIC   logLik
#>   263.3081 284.0911 -125.654
#> 
#> Random effects:
#>  Formula: ~1 | tree
#>         (Intercept)  Residual
#> StdDev:   0.5154042 0.3925777
#> 
#> Correlation Structure: Exponential spatial correlation
#>  Formula: ~day | tree 
#>  Parameter estimate(s):
#>    range 
#> 3.794624 
#> Fixed effects:  water ~ 1 + ti + ti2 
#>                  Value Std.Error  DF   t-value p-value
#> (Intercept) -11.223310 1.0988725 227 -10.21348       0
#> ti           12.712094 0.9794235 227  12.97916       0
#> ti2          -2.913682 0.2148551 227 -13.56115       0
#>  Correlation: 
#>     (Intr) ti    
#> ti  -0.985       
#> ti2  0.976 -0.997
#> 
#> Standardized Within-Group Residuals:
#>         Min          Q1         Med          Q3         Max 
#> -3.04861039 -0.55703950  0.00278101  0.62558762  3.80676991 
#> 
#> Number of Observations: 239
#> Number of Groups: 10