GLMM FAQ

linear mixed models

grep("l.?m[me][^t]",rownames(available.packages(repos = 'https://cran.us.r-project.org')),value=TRUE)

 [1] "blmeco"         "buildmer"       "cellVolumeDist"
 [4] "climextRemes"   "elementR"       "glmertree"     
 [7] "glmmboot"       "glmmEP"         "glmmfields"    
[10] "glmmLasso"      "glmmML"         "glmmSeq"       
[13] "glmmsr"         "glmmTMB"        "lamme"         
[16] "lme4"           "lmec"           "lmeInfo"       
[19] "lmem.qtler"     "lmeNB"          "lmeNBBayes"    
[22] "lmeresampler"   "lmerTest"       "lmeSplines"    
[25] "lmeVarComp"     "lmmot"          "lmmpar"        
[28] "lrmest"         "lsmeans"        "mailmerge"     
[31] "mlmm.gwas"      "mlmmm"          "mvglmmRank"    
[34] "nlmeODE"        "nlmeU"          "palmerpenguins"
[37] "tlmec"          "vagalumeR"     
library(lme4)
library(equatiomatic)
fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
equatiomatic::extract_eq(fm1)

overdisp_fun <- function(model) {
    rdf <- df.residual(model)
    rp <- residuals(model,type="pearson")
    Pearson.chisq <- sum(rp^2)
    prat <- Pearson.chisq/rdf
    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}

library(lme4)
library(glmmTMB)

set.seed(101)  
d <- data.frame(x=runif(1000),
                f=factor(sample(1:10,size=1000,replace=TRUE)))
suppressMessages(d$y <- simulate(~x+(1|f), family=poisson,
                          newdata=d,
                          newparams=list(theta=1,beta=c(0,2)))[[1]])
m1 <- glmer(y~x+(1|f),data=d,family=poisson)
overdisp_fun(m1)

       chisq        ratio          rdf            p 
1035.9966325    1.0391140  997.0000000    0.1902294 
m2 <- glmmTMB(y~x+(1|f),data=d,family="poisson")
overdisp_fun(m2)

       chisq        ratio          rdf            p 
1035.9961394    1.0391135  997.0000000    0.1902323 
## extract summary table; you may also be able to do this via
##  broom::tidy or broom.mixed::tidy
quasi_table <- function(model,ctab=coef(summary(model)),
                           phi=overdisp_fun(model)["ratio"]) {
    qctab <- within(as.data.frame(ctab),
    {   `Std. Error` <- `Std. Error`*sqrt(phi)
        `z value` <- Estimate/`Std. Error`
        `Pr(>|z|)` <- 2*pnorm(abs(`z value`), lower.tail=FALSE)
    })
    return(qctab)
}
printCoefmat(quasi_table(m1),digits=3)

            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   0.2277     0.2700    0.84      0.4    
x             2.0640     0.0528   39.11   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## to use this with glmmTMB, we need to separate out the
##  conditional component of the summary
printCoefmat(quasi_table(m2,
                         ctab=coef(summary(m2))[["cond"]]),
             digits=3)

            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   0.2277     0.2700    0.84      0.4    
x             2.0640     0.0528   39.09   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(broom.mixed)
library(dplyr)
tidy_quasi <- function(model, phi=overdisp_fun(model)["ratio"],
                       conf.level=0.95) {
    tt <- (tidy(model, effects="fixed")
        %>% mutate(std.error=std.error*sqrt(phi),
                   statistic=estimate/std.error,
                   p.value=2*pnorm(abs(statistic), lower.tail=FALSE))
    )
    return(tt)
}
tidy_quasi(m1)

# A tibble: 2 x 6
  effect term        estimate std.error statistic p.value
  <chr>  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 fixed  (Intercept)    0.228    0.270      0.843   0.399
2 fixed  x              2.06     0.0528    39.1     0    
tidy_quasi(m2)

# A tibble: 2 x 7
  effect component term        estimate std.error statistic p.value
  <chr>  <chr>     <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 fixed  cond      (Intercept)    0.228    0.270      0.843   0.399
2 fixed  cond      x              2.06     0.0528    39.1     0    
library(sos)
findFn("corStruct")

Separation: TRUE 
Existence of maximum likelihood estimates
(Intercept)      height 
        Inf         Inf 
0: finite value, Inf: infinity, -Inf: -infinity
modelfit.all <- lme4::allFit(model)
ss <- summary(modelfit.all)

theta <- getME(model,"theta")
## diagonal elements are identifiable because they are fitted
##  with a lower bound of zero ...
diag.element <- getME(model,"lower")==0
any(theta[diag.element]<1e-5)

library(blme)
blmer(formula = y ~ 1 + (1 | group), weights = V,
      resid.prior = point(1.0), cov.prior = NULL)
nlme::lme(Reaction~Days,random=~1|Subject,
          data=lme4::sleepstudy,
          control=list(sigma=1e-8))

library(nlme)
lmeDF <- function(formula=distance~age,random=~1|Subject) {
     mod <- lme(formula,random,data=Orthodont)
     aa <- anova(mod)
    return(setNames(aa[,"denDF"],rownames(aa)))
}
lmeDF()

(Intercept)         age 
         80          80 
lmeDF(random=~age|Subject) ## wrong!

(Intercept)         age 
         80          80 
home = ifelse(Sys.info()["sysname"] == "Windows",
              Sys.getenv("USERPROFILE"),
              Sys.getenv("HOME"))
home = home %>% gsub("\\\\", "/", .)

data_dir = file.path(
  home,
  "Google Drive (basil.okola@student.uhasselt.be)",
  "MSc. Stats Hasselt",
  "y1 sem2",
  "Multivariate and hierarchical data",
  "sample size calculation"
)
site_dir = file.path(home, "Distill websites", "_posts")
site_dir2 = file.path(home, "Distill websites")
source(file.path(site_dir,"2021-04-28-glmm-faq", "calcDenDF.R"))
calcDenDF(~age,"Subject",nlme::Orthodont)

(Intercept)         age 
         80          80 
calcDenDF(~age,data=nlme::Orthodont,random=~1|Subject)

(Intercept)         age 
         80          80 
calcDenDF(~age,data=nlme::Orthodont,random=~age|Subject) ## off by 1

(Intercept)         age 
         81          25 
library(lme4)
m2 <- lmer(Reaction~Days+(1|Subject)+(0+Days|Subject),sleepstudy,REML=FALSE)
m1 <- update(m2,.~Days+(1|Subject))
m0 <- lm(Reaction~Days,sleepstudy)
anova(m2,m1,m0) ## two sequential tests

Data: sleepstudy
Models:
m0: Reaction ~ Days
m1: Reaction ~ Days + (1 | Subject)
m2: Reaction ~ Days + (1 | Subject) + (0 + Days | Subject)
   npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
m0    3 1906.3 1915.9 -950.15   1900.3                          
m1    4 1802.1 1814.8 -897.04   1794.1 106.214  1  < 2.2e-16 ***
m2    5 1762.0 1778.0 -876.00   1752.0  42.075  1  8.782e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(RLRsim)
## compare m0 and m1
exactLRT(m1,m0)

    simulated finite sample distribution of LRT. (p-value based
    on 10000 simulated values)

data:  
LRT = 106.21, p-value < 2.2e-16
## compare m1 and m2
mA <- update(m2,REML=TRUE)
m0B <- update(mA, . ~ . - (0 + Days|Subject))
m.slope  <- update(mA, . ~ . - (1|Subject))
exactRLRT(m0=m0B,m=m.slope,mA=mA)

    simulated finite sample distribution of RLRT.

    (p-value based on 10000 simulated values)

data:  
RLRT = 42.796, p-value < 2.2e-16
(pb <- pbkrtest::PBmodcomp(m2,m1,seed=101))

Bootstrap test; time: 53.98 sec; samples: 1000; extremes: 0;
Requested samples: 1000 Used samples: 500 Extremes: 0
large : Reaction ~ Days + (1 | Subject) + (0 + Days | Subject)
Reaction ~ Days + (1 | Subject)
         stat df   p.value    
LRT    42.075  1 8.782e-11 ***
PBtest 42.075     0.001996 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(nlme) 
fm1 <- lme(distance ~ age*Sex, random = ~ 1 + age | Subject,
           data = Orthodont) 
plot(Orthodont,asp="fill") ## plot responses by individual

## note that expand.grid() orders factor levels by *order of
## appearance* -- must match levels(Orthodont$Sex)
newdat <- expand.grid(age=c(8,10,12,14), Sex=c("Female","Male")) 
newdat$pred <- predict(fm1, newdat, level = 0)

## [-2] drops response from formula
Designmat <- model.matrix(formula(fm1)[-2], newdat)
predvar <- diag(Designmat %*% vcov(fm1) %*% t(Designmat)) 
newdat$SE <- sqrt(predvar) 
newdat$SE2 <- sqrt(predvar+fm1$sigma^2)

library(ggplot2) 
pd <- position_dodge(width=0.4) 
g0 <- ggplot(newdat,aes(x=age,y=pred,colour=Sex))+ 
   geom_point(position=pd)
cmult <- 2  ## could use 1.96 instead
g0 + geom_linerange(aes(ymin=pred-cmult*SE,ymax=pred+cmult*SE), position=pd)

## prediction intervals 
g0 + geom_linerange(aes(ymin=pred-cmult*SE2,ymax=pred+cmult*SE2), position=pd) 

library(lme4)
library(ggplot2)
data("Orthodont",package="MEMSS")
fm1 <- lmer(
  formula = distance ~ age*Sex + (age|Subject)
  , data = Orthodont
)
newdat <- expand.grid(
  age=c(8,10,12,14)
  , Sex=c("Female","Male")
  , distance = 0
)
newdat$distance <- predict(fm1,newdat,re.form=NA)
mm <- model.matrix(terms(fm1),newdat)
## or newdat$distance <- mm %*% fixef(fm1)
pvar1 <- diag(mm %*% tcrossprod(vcov(fm1),mm))
tvar1 <- pvar1+VarCorr(fm1)$Subject[1]  ## must be adapted for more complex models
cmult <- 2 ## could use 1.96
newdat <- data.frame(
  newdat
  , plo = newdat$distance-cmult*sqrt(pvar1)
  , phi = newdat$distance+cmult*sqrt(pvar1)
  , tlo = newdat$distance-cmult*sqrt(tvar1)
  , thi = newdat$distance+cmult*sqrt(tvar1)
)
#plot confidence
g0 <- ggplot(newdat, aes(x=age, y=distance, colour=Sex))+geom_point()
g0 + geom_pointrange(aes(ymin = plo, ymax = phi))+
    labs(title="CI based on fixed-effects uncertainty ONLY")

#plot prediction
g0 + geom_pointrange(aes(ymin = tlo, ymax = thi))+
    labs(title="CI based on FE uncertainty + RE variance")

rm("Orthodont") ## clean up

library(glmmTMB)
data(Orthodont,package="nlme")
fm2 <- glmmTMB(distance ~ age*Sex + (age | Subject),
                data = Orthodont,
                family="gaussian")

## make prediction data frame
newdat <- expand.grid(age=c(8,10,12,14), Sex=c("Female","Male"))
## design matrix (fixed effects)
mm <- model.matrix(delete.response(terms(fm2)),newdat)
## linear predictor (for GLMMs, back-transform this with the
##  inverse link function (e.g. plogis() for binomial, beta;
##  exp() for Poisson, negative binomial
newdat$distance <- drop(mm %*% fixef(fm2)[["cond"]])
predvar <- diag(mm %*% vcov(fm2)[["cond"]] %*% t(mm))
newdat$SE <- sqrt(predvar) 
newdat$SE2 <- sqrt(predvar+sigma(fm2)^2)

library(ggplot2);  theme_set(theme_bw())
pd <- position_dodge(width=0.4)
g0 <- ggplot(Orthodont,aes(x=age,y=distance,colour=Sex))+
    stat_sum(alpha=0.2,aes(size=..n..))+
    scale_size_continuous(breaks=1:4,range=c(2,5))
g1 <- g0+geom_line(data=newdat,position=pd)+
    geom_point(data=newdat,shape=17,size=3,position=pd)
## confidence intervals
g2 <- g1 + geom_linerange(data=newdat,
                          aes(ymin=distance-2*SE,ymax=distance+2*SE),
                          lwd=2, position=pd)
## prediction intervals 
g2 + geom_linerange(data=newdat,
                    aes(ymin=distance-2*SE2,ymax=distance+2*SE2), position=pd)

library(lme4)
fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
cV <- ranef(fm1, condVar = TRUE)   

ranvar <- attr(cV[[1]], "postVar")

sqrt(diag(ranvar[,,1]))

[1] 12.070857  2.304839
ng <- dim(ranvar)[3]
np <- dim(ranvar)[2]
mm <- matrix(ranvar[cbind(rep(seq(np),ng),
             rep(seq(np),ng),
             rep(ng,each=np))],
       byrow=TRUE,
       nrow=ng)

vcov(fm1)[1,1]+mm[,1]

 [1] 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807
 [8] 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807 192.2807
[15] 192.2807 192.2807 192.2807 192.2807
library(sos); findFn("{power analysis} mixed simulation")

r2.corr.mer <- function(m) {
   lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
   summary(lmfit)$r.squared
}

1-var(residuals(m))/var(model.response(model.frame(m)))

cor(model.response(model.frame(m)),predict(m,type="response"))^2

## n.b. have to set up a 3D warn array first ...
withCallingHandlers(tryCatch(fun(n=nvec[j],tau=tauvec[i],...),
                error = function(e) {
                  warn[k,i,j] <<- paste("ERROR:",e$message)
              NA_ans}),
               warning = function(w) {
                  warn[k,i,j] <<- w$message
                  invokeRestart("muffleWarning")
             })

sessionInfo()

R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
 [1] ggplot2_3.3.3      RLRsim_3.1-6       nlme_3.1-148      
 [4] dplyr_1.0.3        broom.mixed_0.2.6  glmmTMB_1.0.2.1   
 [7] equatiomatic_0.2.0 lme4_1.1-26        Matrix_1.2-18     
[10] Cairo_1.5-12.2     pander_0.6.3       knitr_1.31        

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.6        mvtnorm_1.1-1     lattice_0.20-41  
 [4] tidyr_1.1.2       zoo_1.8-8         assertthat_0.2.1 
 [7] digest_0.6.27     utf8_1.1.4        R6_2.5.0         
[10] plyr_1.8.6        backports_1.2.1   evaluate_0.14    
[13] coda_0.19-4       highr_0.8         pillar_1.5.1     
[16] rlang_0.4.10      multcomp_1.4-15   minqa_1.2.4      
[19] rstudioapi_0.13   nloptr_1.2.2.2    jquerylib_0.1.3  
[22] rmarkdown_2.7     labeling_0.4.2    splines_4.0.2    
[25] statmod_1.4.35    stringr_1.4.0     TMB_1.7.18       
[28] munsell_0.5.0     broom_0.7.5       compiler_4.0.2   
[31] xfun_0.20         pkgconfig_2.0.3   mgcv_1.8-31      
[34] htmltools_0.5.1.1 downlit_0.2.1     tidyselect_1.1.0 
[37] tibble_3.1.0      codetools_0.2-16  fansi_0.4.2      
[40] withr_2.4.0       crayon_1.3.4      MASS_7.3-53.1    
[43] grid_4.0.2        gtable_0.3.0      jsonlite_1.7.2   
[46] xtable_1.8-4      lifecycle_0.2.0   DBI_1.1.1        
[49] magrittr_2.0.1    scales_1.1.1      estimability_1.3 
[52] cli_2.2.0         stringi_1.5.3     farver_2.0.3     
[55] reshape2_1.4.4    bslib_0.2.4       ellipsis_0.3.1   
[58] generics_0.1.0    vctrs_0.3.6       boot_1.3-25      
[61] sandwich_3.0-0    distill_1.2       TH.data_1.0-10   
[64] tools_4.0.2       glue_1.4.2        purrr_0.3.4      
[67] emmeans_1.5.3     survival_3.1-12   yaml_2.2.1       
[70] colorspace_2.0-0  sass_0.3.1       
Bolker (2021, April 28). Basil Okola: GLMM FAQ. Retrieved from https://bokola214.netlify.app/posts/2021-04-28-glmm-faq/
@misc{bolker2021glmm,
  author = {Bolker, Ben},
  title = {Basil Okola: GLMM FAQ},
  url = {https://bokola214.netlify.app/posts/2021-04-28-glmm-faq/},
  year = {2021}
}

formula	meaning
`(1\|group)`	random group intercept
`(x\|group)` = `(1+x\|group)`	random slope of x within group with correlated intercept
`(0+x\|group)` = `(-1+x\|group)`	random slope of x within group: no variation in intercept
`(1\|group) + (0+x\|group)`	uncorrelated random intercept and random slope within group
`(1\|site/block)` = `(1\|site)+(1\|site:block)`	intercept varying among sites and among blocks within sites (nested random effects)
`site+(1\|site:block)`	fixed effect of sites plus random variation in intercept among blocks within sites
`(x\|site/block)` = `(x\|site)+(x\|site:block)` = `(1 + x\|site)+(1+x\|site:block)`	slope and intercept varying among sites and among blocks within sites
`(x1\|site)+(x2\|block)`	two different effects, varying at different levels
`x*site+(x\|site:block)`	fixed effect variation of slope and intercept varying among sites and random variation of slope and intercept among blocks within sites
`(1\|group1)+(1\|group2)`	intercept varying among crossed random effects (e.g. site, year)

equation	formula
	n/a (Not a mixed-effects model)
	`~ X + (1\|Subject)`
	`~ X + (1 + X\|Subject)`
	`~ X + (1 + X\|Subject) + (1\|Item)`
As above, but , independent	`~ X + (1\|Subject) + (0 + X\| Subject) + (1\|Item)`
	`~ X + (1\|Subject) + (1\|Item)`
	`~ X + (0 + X\|Subject) + (1\|Item)`

Method	Advantages	Disadvantages	Packages
Penalized quasi-likelihood	Flexible, widely implemented	Likelihood inference may be inappropriate; biased for large variance or small means	PROC GLIMMIX (SAS), GLMM (GenStat), glmmPQL (R:MASS), ASREML-R
Laplace approximation	More accurate than PQL	Slower and less flexible than PQL	glmer (R:lme4,lme4a), glmm.admb (R:glmmADMB), INLA, glmmTMB, AD Model Builder, HLM
Gauss-Hermite quadrature	More accurate than Laplace	Slower than Laplace; limited to 2-3 random effects	PROC NLMIXED (SAS), glmer (R:lme4, lme4a), glmmML (R:glmmML), xtlogit (Stata)
Markov chain Monte Carlo	Highly flexible, arbitrary number of random effects; accurate	Slow, technically challenging, Bayesian framework	MCMCglmm (R:MCMCglmm), rstanarm (R), brms (R), MCMCpack (R), WinBUGS/OpenBUGS (R interface: BRugs/R2WinBUGS), JAGS (R interface: rjags/R2jags), AD Model Builder (R interface: R2admb), glmm.admb (post hoc MCMC after Laplace fit) (R:glmmADMB)

GLMM FAQ

Author

Affiliation

Published

Citation

Introduction

Other sources of help

References

linear mixed models

web/open

books (dead-tree/closed)

Model definition

Model specification

Should I treat factor xxx as fixed or random?

Nested or crossed?

(When) can I include a predictor as both fixed and random?

Model extensions

Overdispersion

Testing for overdispersion/computing overdispersion factor

Fitting models with overdispersion?

Underdispersion

Gamma GLMMs

Beta GLMMs

Zero-inflation

Count data

Continuous data

Probability density of x zero or infinite

Probability density of x positive and finite

Tests for zero-inflation

Spatial and temporal correlation models, heteroscedasticity (“R-side” models)

Penalization/handling complete separation

Non-Gaussian random effects

Estimation

What methods are available to fit (estimate) GLMMs?

Troubleshooting

Convergence warnings

Singular models: random effect variances estimated as zero, or correlations estimated as +/- 1

Setting residual variances to a fixed value (zero or other)

Other problems/lme4 error messages

REML for GLMMs

Model diagnostics

Inference and confidence intervals

Testing hypotheses

What are the p-values listed by summary(glmerfit) etc.? Are they reliable?

Methods for testing single parameters

Tests of effects (i.e. testing that several parameters are simultaneously zero)

Is the likelihood ratio test reliable for mixed models?

Why doesn’t lme4 display denominator degrees of freedom/p values? What other options do I have?

Df alternatives:

Testing significance of random effects

Standard errors of variance estimates

P-values: MCMC and parametric bootstrap

Markov chain Monte Carlo sampling:

Status of mcmcsamp

Parametric bootstrap

Predictions and/or confidence (or prediction) intervals on predictions

lme

lme4

glmmTMB

Confidence intervals on conditional means/BLUPs/random effects

lme4

Power analysis

Model selection and averaging

Can I use AIC for mixed models? How do I count the number of degrees of freedom for a random effect?

Model summaries (goodness-of-fit, decomposition of variance, etc.)

How do I compute a coefficient of determination (R2), or an analogue, for (G)LMMs?

Problem

Simple/crude solutions

Sophisticated solutions

Variable importance

Do I have to specify the levels of fixed effects in lmer?

Miscellaneous/procedural

Pronunciation of lmer/glmer/etc.

Storing information

Mixed modeling packages

Which R packages (functions) fit GLMMs?

Should I use aov(), nlme, or lme4, or some other package?

linear and nonlinear mixed models

GLMMs

Additive and generalized-additive mixed models

Probability density of zero or infinite

Probability density of positive and finite

Other problems/`lme4` error messages

What are the p-values listed by `summary(glmerfit)` etc.? Are they reliable?

Why doesn’t `lme4` display denominator degrees of freedom/p values? What other options do I have?

How do I compute a coefficient of determination (), or an analogue, for (G)LMMs?

Pronunciation of `lmer`/`glmer`/etc.

Should I use `aov()`, `nlme`, or `lme4`, or some other package?