20 GLMM, Binary Outcome: Muscatine Obesity

20.1 PREPARATION

20.1.1 Load Packages

install.packages("devtools")
devtools::install_github("sarbearschwartz/apaSupp") # updated 11/11/2025

Load/activate these packages

library(tidyverse)    
library(flextable)
library(apaSupp)      # Not on CRAN, on GitHub (see above)
library(psych)      
library(geepack)  
library(lme4)         
library(lmerTest)
library(corrplot) 
library(emmeans)
library(interactions)
library(performance)      
library(patchwork)  

20.2 Data Prep

Data on Obesity from the Muscatine Coronary Risk Factor Study.

Source:

Table 10 (page 96) in Woolson and Clarke (1984). With permission of Blackwell Publishing.

Reference:

Woolson, R.F. and Clarke, W.R. (1984). Analysis of categorical incompletel longitudinal data. Journal of the Royal Statistical Society, Series A, 147, 87-99.

Description:

The Muscatine Coronary Risk Factor Study (MCRFS) was a longitudinal study of coronary risk factors in school children in Muscatine, Iowa (Woolson and Clarke 1984; Ekholm and Skinner 1998). Five cohorts of children were measured for height and weight in 1977, 1979, and 1981. Relative weight was calculated as the ratio of a child’s observed weight to the median weight for their age-sex-height group. Children with a relative weight greater than 110% of the median weight for their respective stratum were classified as obese. The analysis of this study involves binary data (1 = obese, 0 = not obese) collected at successive time points.

This data was also using in an article title “Missing data methods in longitudinal studies: a review” (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3016756/).

Variable List:

  • Indicators

    • id Child’s unique identification number
    • occas Occasion number: 1, 2, 3
  • Outcome or dependent variable

    • obesity Obesity Status, 0 = no, 1 = yes
  • Main predictor or independent variable of interest

    • gender 0 = Male, 1 = Female
    • baseage Baseline Age, mid-point of age-cohort
    • currage Current Age, mid-point of age-cohort

20.2.1 Import

df_raw <- read.table("data/Muscatine.txt", header=TRUE)
str(df_raw)
'data.frame':   14568 obs. of  6 variables:
 $ id     : int  1 1 1 2 2 2 3 3 3 4 ...
 $ gender : int  0 0 0 0 0 0 0 0 0 0 ...
 $ baseage: int  6 6 6 6 6 6 6 6 6 6 ...
 $ currage: int  6 8 10 6 8 10 6 8 10 6 ...
 $ occas  : int  1 2 3 1 2 3 1 2 3 1 ...
 $ obesity: chr  "1" "1" "1" "1" ...
psych::headTail(df_raw, top = 10)
        id gender baseage currage occas obesity
1        1      0       6       6     1       1
2        1      0       6       8     2       1
3        1      0       6      10     3       1
4        2      0       6       6     1       1
5        2      0       6       8     2       1
6        2      0       6      10     3       1
7        3      0       6       6     1       1
8        3      0       6       8     2       1
9        3      0       6      10     3       1
10       4      0       6       6     1       1
...    ...    ...     ...     ...   ...    <NA>
14565 4855      1      14      18     3       0
14566 4856      1      14      14     1       .
14567 4856      1      14      16     2       .
14568 4856      1      14      18     3       0

20.2.2 Restrict to 350ID’s of children with complete data for Class Demonstration

Dealing with missing-ness and its implications are beyond the score of this class. Instead we are going to restrict our class analysis to a subset of 350 children who have complete data

I am using the set.seed() function so that I can replicate the restults later.

complete_ids <- df_raw %>% 
  dplyr::filter(obesity %in% c("0", "1")) %>%
  dplyr::group_by(id) %>% 
  dplyr::summarise(n = n()) %>% 
  dplyr::filter(n == 3) %>% 
  dplyr::pull(id)

set.seed(8892)  # needed?

use_ids <- complete_ids %>% sample(350)

head(use_ids)
[1] 3574  805 3458 3537  679  655

20.2.3 Long Format

df_long <- df_raw %>%  
  dplyr::filter(id %in% use_ids) %>% 
  dplyr::mutate(id       = factor(id)) %>% 
  dplyr::mutate(gender   = gender %>% 
                  factor(levels = 0:1, 
                         labels = c("Male", "Female"))) %>% 
  dplyr::mutate(age_base = baseage %>% factor) %>% 
  dplyr::mutate(age_curr = currage %>% factor) %>% 
  dplyr::mutate(ageGc = age_curr %>% 
                  as.character %>% 
                  as.numeric -12) %>% 
  dplyr::mutate(occation = occas   %>% factor) %>% 
  dplyr::mutate(obesity  = obesity %>% 
                  factor(levels = 0:1, 
                         labels = c("No", "Yes"))) %>% 
  dplyr::mutate(obesity_num = obesity %>% 
                  as.numeric - 1) %>% 
  dplyr::select(id, gender, 
                age_base, age_curr, ageGc,
                occation, 
                obesity, obesity_num)
str(df_long)
'data.frame':   1050 obs. of  8 variables:
 $ id         : Factor w/ 350 levels "1","5","10","16",..: 1 1 1 2 2 2 3 3 3 4 ...
 $ gender     : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_base   : Factor w/ 5 levels "6","8","10","12",..: 1 1 1 1 1 1 2 2 2 2 ...
 $ age_curr   : Factor w/ 7 levels "6","8","10","12",..: 1 2 3 1 2 3 2 3 4 2 ...
 $ ageGc      : num  -6 -4 -2 -6 -4 -2 -4 -2 0 -4 ...
 $ occation   : Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
 $ obesity    : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ obesity_num: num  1 1 1 1 1 1 1 1 1 1 ...
psych::headTail(df_long, top = 10)
       id gender age_base age_curr ageGc occation obesity obesity_num
1       1   Male        6        6    -6        1     Yes           1
2       1   Male        6        8    -4        2     Yes           1
3       1   Male        6       10    -2        3     Yes           1
4       5   Male        6        6    -6        1     Yes           1
5       5   Male        6        8    -4        2     Yes           1
6       5   Male        6       10    -2        3     Yes           1
7      10   Male        8        8    -4        1     Yes           1
8      10   Male        8       10    -2        2     Yes           1
9      10   Male        8       12     0        3     Yes           1
10     16   Male        8        8    -4        1     Yes           1
...  <NA>   <NA>     <NA>     <NA>   ...     <NA>    <NA>         ...
1047 3582 Female       14       18     6        3      No           0
1048 3584 Female       14       14     2        1      No           0
1049 3584 Female       14       16     4        2      No           0
1050 3584 Female       14       18     6        3      No           0

20.2.4 Wide Format

df_wide <- df_long %>% 
  dplyr::select(-ageGc) %>% 
  tidyr::pivot_wider(names_from = occation,
                     names_sep = "_",
                     values_from = c(obesity, age_curr)) %>% 
  mutate_if(is.character, factor)%>% 
  group_by(id) %>% 
  mutate(num_miss = sum(is.na(c(obesity_1, 
                                obesity_2, 
                                obesity_3)))) %>% 
  ungroup() %>% 
  mutate(num_miss = as.factor(num_miss))
str(df_wide)
tibble [434 × 11] (S3: tbl_df/tbl/data.frame)
 $ id         : Factor w/ 350 levels "1","5","10","16",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ gender     : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_base   : Factor w/ 5 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
 $ obesity_num: num [1:434] 1 1 1 1 1 1 1 1 1 1 ...
 $ obesity_1  : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ obesity_2  : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ obesity_3  : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ age_curr_1 : Factor w/ 7 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
 $ age_curr_2 : Factor w/ 7 levels "6","8","10","12",..: 2 2 3 3 3 4 4 4 5 5 ...
 $ age_curr_3 : Factor w/ 7 levels "6","8","10","12",..: 3 3 4 4 4 5 5 5 6 6 ...
 $ num_miss   : Factor w/ 2 levels "0","3": 1 1 1 1 1 1 1 1 1 1 ...
psych::headTail(df_wide, top = 10)
     id gender age_base obesity_num obesity_1 obesity_2 obesity_3 age_curr_1
1     1   Male        6           1       Yes       Yes       Yes          6
2     5   Male        6           1       Yes       Yes       Yes          6
3    10   Male        8           1       Yes       Yes       Yes          8
4    16   Male        8           1       Yes       Yes       Yes          8
5    21   Male        8           1       Yes       Yes       Yes          8
6    30   Male       10           1       Yes       Yes       Yes         10
7    44   Male       10           1       Yes       Yes       Yes         10
8    50   Male       10           1       Yes       Yes       Yes         10
9    60   Male       12           1       Yes       Yes       Yes         12
10   61   Male       12           1       Yes       Yes       Yes         12
11 <NA>   <NA>     <NA>         ...      <NA>      <NA>      <NA>       <NA>
12 3580 Female       14           0        No        No        No         14
13 3581 Female       14           0        No        No        No         14
14 3582 Female       14           0        No        No        No         14
15 3584 Female       14           0        No        No        No         14
   age_curr_2 age_curr_3 num_miss
1           8         10        0
2           8         10        0
3          10         12        0
4          10         12        0
5          10         12        0
6          12         14        0
7          12         14        0
8          12         14        0
9          14         16        0
10         14         16        0
11       <NA>       <NA>     <NA>
12         16         18        0
13         16         18        0
14         16         18        0
15         16         18        0

20.3 Exploratory Data Analysis

20.3.1 Summary Statistics

20.3.1.1 Demographics and Baseline

df_wide %>% 
  dplyr::select(gender,
                "Age Cohort" = age_base,  
                "Obesity Status" = obesity_1) %>% 
  apaSupp::tab1(split = "gender",
                caption = "Baseline Demographics",
                type = list("Obesity Status" = "categorical"))
Table 20.1
Baseline Demographics

Total
N = 434

Male
n = 203 (46.8%)

Female
n = 231 (53.2%)

p-value

Age Cohort

.676

6

49 (11.3%)

18 (8.9%)

31 (13.4%)

8

119 (27.4%)

58 (28.6%)

61 (26.4%)

10

110 (25.3%)

52 (25.6%)

58 (25.1%)

12

84 (19.4%)

41 (20.2%)

43 (18.6%)

14

72 (16.6%)

34 (16.7%)

38 (16.5%)

Obesity Status

.608

No

286 (81.7%)

138 (83.1%)

148 (80.4%)

Yes

64 (18.3%)

28 (16.9%)

36 (19.6%)

Unknown

84

37

47

Note. . .

* p < .05. ** p < .01. *** p < .001.

20.3.1.2 Status over Time

data_summary <- df_long %>% 
  dplyr::group_by(gender, age_curr) %>% 
  dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
                                     obesity == "No"  ~ 0)) %>% 
  dplyr::filter(complete.cases(gender, age_curr, obesityN)) %>% 
  dplyr::summarise(n = n(),
                   prob_est = mean(obesityN),
                   prob_SD  = sd(obesityN),
                   prob_SE  = prob_SD/sqrt(n))

data_summary
# A tibble: 14 × 6
# Groups:   gender [2]
   gender age_curr     n prob_est prob_SD prob_SE
   <fct>  <fct>    <int>    <dbl>   <dbl>   <dbl>
 1 Male   6           17    0.118   0.332  0.0805
 2 Male   8           64    0.172   0.380  0.0475
 3 Male   10         105    0.143   0.352  0.0343
 4 Male   12         121    0.198   0.400  0.0364
 5 Male   14         102    0.225   0.420  0.0416
 6 Male   16          61    0.213   0.413  0.0529
 7 Male   18          28    0.143   0.356  0.0673
 8 Female 6           25    0.16    0.374  0.0748
 9 Female 8           69    0.203   0.405  0.0488
10 Female 10         120    0.275   0.448  0.0409
11 Female 12         129    0.256   0.438  0.0386
12 Female 14         115    0.243   0.431  0.0402
13 Female 16          64    0.281   0.453  0.0566
14 Female 18          30    0.267   0.450  0.0821

20.3.2 Visualize

20.3.2.1 By cohort and gender

df_long %>% 
  dplyr::group_by(gender, age_base, age_curr) %>% 
  dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
                                     obesity == "No"  ~ 0)) %>% 
  dplyr::filter(complete.cases(gender, age_curr, obesityN)) %>% 
  dplyr::summarise(n = n(),
                   prob_est = mean(obesityN),
                   prob_SD  = sd(obesityN),
                   prob_SE  = prob_SD/sqrt(n)) %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = age_base,
             color = age_base)) +
  geom_point() +
  geom_line() +
  theme_bw() + 
  labs(x = "Child's Age, years",
       y = "Proportion Obese") +
  facet_grid(. ~ gender)

20.3.2.2 BY only gender

data_summary %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = gender)) +
  geom_ribbon(aes(ymin = prob_est - prob_SE,
                  ymax = prob_est + prob_SE,
                  fill = gender),
              alpha = .3) +
  geom_point(aes(color = gender,
                 shape = gender)) +
  geom_line(aes(linetype = gender,
                color = gender)) +
  theme_bw() + 
  scale_color_manual(values = c("dodger blue", "hot pink")) +
  scale_fill_manual(values = c("dodger blue", "hot pink")) +
  labs(x = "Child's Age, years",
       y = "Proportion Obese")

Smooth out the trends

data_summary %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = gender,
             color = gender)) +
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 2),
              se = FALSE) +
  theme_bw() + 
  scale_color_manual(values = c("dodger blue", "hot pink")) +
  scale_fill_manual(values = c("dodger blue", "hot pink")) +
  labs(x = "Child's Age, years",
       y = "Proportion Obese")

20.4 Analysis Goal

Does risk of obesity increase with age and are patterns of change similar for both sexes?

There are 5 age cohorts that were measured each for 3 years, baseage and currage are age midpoints of those cohort groups. Which to include, current age or occasion? Assume no cohort effects. If you do think this is an issue, include baseline age (age_base) and current age minus baseline age (time) in model.

df_long %>% 
  group_by(gender, age_base, occation) %>% 
  summarise(n       = n(),
            count   = sum(obesity == "Yes"),
            prop    = mean(obesity == "Yes"),
            se      = sd(obesity == "Yes")/sqrt(n)) %>%
  mutate(time = (occation %>% as.numeric) * 2 - 2) %>% 
  ggplot(aes(x = time,
             y = prop,
             fill = gender))  +
  geom_ribbon(aes(ymin = prop - se,
                  ymax = prop + se),
              alpha = 0.2) +
  geom_point(aes(color = gender)) +
  geom_line(aes(color = gender)) +
  theme_bw() +
  facet_wrap(~ age_base, labeller = label_both) +
  labs(title    = "Observed Obesity Rates, by Gender within Cohort",
       subtitle = "Subset of 350 children with complete data",
       x        = "Time, years from 1977",
       y        = "Proportion of Children Characterized as Obese") +
  scale_fill_manual(values = c("dodgerblue3", "red")) +
  scale_color_manual(values = c("dodgerblue3", "red")) +
  scale_x_continuous(breaks = seq(from = 0, to = 4, by = 2)) +
  theme(legend.position = c(1, 0),
        legend.justification = c(1, 0),
        legend.background = element_rect(color = "black"))

df_long %>% 
  group_by(gender, age_curr) %>% 
  summarise(n       = n(),
            count   = sum(obesity == "Yes"),
            prop    = mean(obesity == "Yes"),
            se      = sd(obesity == "Yes")/sqrt(n)) %>%
  ggplot(aes(x = age_curr %>% as.character %>% as.numeric,
             y = prop,
             group = gender,
             fill = gender))  +
  geom_ribbon(aes(ymin = prop - se,
                  ymax = prop + se),
              alpha = 0.2) +
  geom_point(aes(color = gender)) +
  geom_line(aes(color = gender)) +
  theme_bw() +
  geom_vline(xintercept = 12, 
             linetype   = "dashed", 
             size       = 1, 
             color      = "navyblue") +
  labs(title    = "Observed Obesity Rates, by Gender (collapsing cohorts)",
       subtitle = "Subset of 350 children with complete data",
       x        = "Age of Child, years",
       y        = "Proportion of Children Characterized as Obese") +
  scale_fill_manual(values = c("dodgerblue3", "red")) +
  scale_color_manual(values = c("dodgerblue3", "red")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 2)) +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"))

20.5 GLM Analysis

20.5.1 Standard logistic regression

fit_glm_1 <- glm(obesity_num ~ gender*ageGc + gender*I(ageGc^2), 
                 data   = df_long,
                 family = binomial(link = "logit"))

fit_glm_2 <- glm(obesity_num ~ gender + ageGc + I(ageGc^2), 
                 data   = df_long,
                 family = binomial(link = "logit"))
apaSupp::tab_glms(list("Interaction" = fit_glm_1,
                       "Main Effects" = fit_glm_2),
                  caption = "GLM: Parameter EStimates")
Table 20.2
GLM: Parameter EStimates

Interaction

Main Effects

Variable

OR

95% CI

p

OR

95% CI

p

gender

Male

Female

1.43

[0.97, 2.12]

.069

1.48

[1.10, 2.00]

.010**

ageGc

1.05

[0.97, 1.14]

.240

1.04

[0.99, 1.10]

.126

I(ageGc^2)

0.99

[0.96, 1.01]

.345

0.99

[0.98, 1.01]

.242

gender * ageGc

Female * ageGc

0.99

[0.89, 1.09]

.787

gender * I(ageGc^2)

Female * I(ageGc^2)

1.00

[0.97, 1.04]

.788

Fit Metrics

AIC

1,105.9

1,102.0

BIC

1,135.6

1,121.8

pseudo-R²

Tjur

.009

.009

McFadden

.009

.009

Note. N = 1050. CI = confidence interval.Coefficient of determination estiamted with both Tjur and McFadden's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

plot_pred_glm <- fit_glm_2 %>% 
  emmeans::emmeans(~gender*ageGc,
                   at = list(ageGc = seq(from = -6, 
                                              to = 6, 
                                              by = 0.25)),
                   type = "response",
                   level = .685) %>% 
   data.frame() %>%
   mutate(age = ageGc + 12) %>% 
  dplyr::mutate(gender = forcats::fct_rev(gender)) %>% 
   ggplot(aes(x        = age,
              y        = prob,
              group    = gender,
              linetype = gender,
              fill     = gender)) +
  geom_ribbon(aes(ymin = asymp.LCL,
                  ymax = asymp.UCL),
              alpha = .3) +
   geom_line(aes(color = gender),
             size = 1.5) +
   theme_bw() +
   labs(title    = "Generalized Linear Model: Model #2",
        x        = "Child's Age, years",
        y        = "Predicted Probability of Obesity",
        linetype = "Gender",
        fill     = "Gender",
        color    = "Gender") +
   theme(legend.position = c(0, 1),
         legend.justification = c(-0.05, 1.05),
         legend.background = element_rect(color = "black"),
         legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

plot_pred_glm 

20.6 GEE Analysis

ALWAYS: fix the scale parameter to 1 with binomial data!!!

20.6.1 Inculde Interactions

fit_gee_1ex <- geepack::geeglm(obesity_num ~ gender*ageGc + gender*I(ageGc^2), 
                        id          = id, 
                        data        = df_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'exchangeable', 
                        scale.fix   = TRUE)

fit_gee_1un <- geepack::geeglm(obesity_num ~ gender*ageGc + gender*I(ageGc^2), 
                        id          = id, 
                        data        = df_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'unstructured', 
                        scale.fix   = TRUE)
apaSupp::tab_gees(list("Exchangeable" = fit_gee_1ex,
                       "Unstructured" = fit_gee_1un),
                caption = "Gee Model Parameters: With Interactions")
Table 20.3
Gee Model Parameters: With Interactions

Exchangeable

Unstructured

OR

95% CI

p

OR

95% CI

p

gender

Male

Female

1.39

[0.87, 2.24]

.169

1.39

[0.86, 2.23]

.175

ageGc

1.07

[0.97, 1.17]

.165

1.06

[0.97, 1.16]

.184

I(ageGc^2)

0.99

[0.97, 1.01]

.285

0.99

[0.97, 1.01]

.293

gender * ageGc

Female * ageGc

1.02

[0.91, 1.14]

.789

1.02

[0.91, 1.14]

.771

gender * I(ageGc^2)

Female * I(ageGc^2)

1.01

[0.98, 1.03]

.560

1.01

[0.98, 1.03]

.607

Note.

* p < .05. ** p < .01. *** p < .001.

20.6.2 Drop the interactions

fit_gee_2ex <- geepack::geeglm(obesity_num ~ gender + ageGc + I(ageGc^2), 
                        id          = id, 
                        data        = df_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'exchangeable', 
                        scale.fix   = TRUE)

fit_gee_2un <- geepack::geeglm(obesity_num ~ gender + ageGc + I(ageGc^2), 
                        id          = id, 
                        data        = df_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'unstructured', 
                        scale.fix   = TRUE)
apaSupp::tab_gees(list("Exchangeable" = fit_gee_2ex,
                       "Unstructured" = fit_gee_2un),
                caption = "Gee Model Parameters: Without Interactions")
Table 20.4
Gee Model Parameters: Without Interactions

Exchangeable

Unstructured

OR

95% CI

p

OR

95% CI

p

gender

Male

Female

1.49

[0.97, 2.29]

.066

1.48

[0.96, 2.27]

.073

ageGc

1.08

[1.02, 1.14]

.009**

1.07

[1.02, 1.13]

.011*

I(ageGc^2)

0.99

[0.98, 1.01]

.332

0.99

[0.98, 1.01]

.315

Note.

* p < .05. ** p < .01. *** p < .001.

20.6.3 Select the “final” model.

interactions::interact_plot(model = fit_gee_2un,
                            pred = ageGc,
                            modx = gender)

plot_pred_gee <- fit_gee_2un %>% 
  emmeans::emmeans(~ gender*ageGc,
                   at = list(ageGc = seq(from = -6, to = 6, by = .1)),
                   type = "response",
                   level = .685) %>% 
  data.frame() %>% 
  mutate(gender = fct_rev(gender)) %>% 
  mutate(age = ageGc + 12) %>% 
  ggplot(aes(x        = age,
             y        = prob,
             group    = gender)) +
  geom_ribbon(aes(ymin = lower.CL,
                  ymax = upper.CL,
                  fill = gender),
              alpha = .2) +
  geom_line(aes(linetype = gender,
                color    = gender),
            size = 1.5) +
  theme_bw() +
  labs(title = "Generalized Estimating Equation: Model #2, unstructured",
       x     = "Child's Age, years",
       y     = "Predicted Proportion with Obesity",
       linetype = "Gender",
       fill = "Gender",
       color = "Gender") +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

  
plot_pred_gee

20.7 GLMM Analysis

IT IS GENERALLY NOT RECOMMENDED THAT RANDOM-SLOPES BE ESTIMATED FOR BINOMIAL GLMMS

fit_glmm_1 <- lme4::glmer(obesity_num ~ ageGc*gender + I(ageGc^2)*gender + (1|id), 
                          data = df_long,
                          family      = binomial(link = "logit")) 

fit_glmm_2 <- lme4::glmer(obesity_num ~ gender + ageGc + I(ageGc^2) + (1|id), 
                          data = df_long,
                          family      = binomial(link = "logit")) 

Indicates smaller model is better, no interaction at higher level necessary

anova(fit_glmm_1, fit_glmm_2)
Data: df_long
Models:
fit_glmm_2: obesity_num ~ gender + ageGc + I(ageGc^2) + (1 | id)
fit_glmm_1: obesity_num ~ ageGc * gender + I(ageGc^2) * gender + (1 | id)
           npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
fit_glmm_2    5 883 908   -436       873                    
fit_glmm_1    7 885 920   -436       871  1.54  2       0.46
apaSupp::tab_glmers(list("With" = fit_glmm_1,
                         "Without" = fit_glmm_2),
                    caption = "GLMM Model Parameters: With and Without Interactions")
Table 20.5
GLMM Model Parameters: With and Without Interactions

With

Without

Variable

OR

[95% CI]

p

OR

[95% CI]

p

ageGc

1.21

[0.97, 1.50]

.087

1.25

[1.09, 1.44]

.002**

gender

Male

Female

1.54

[0.42, 5.66]

.518

2.05

[0.62, 6.83]

.242

I(ageGc^2)

0.97

[0.92, 1.02]

.171

0.98

[0.96, 1.01]

.287

ageGc * gender

ageGc * Female

1.08

[0.82, 1.43]

.594

gender * I(ageGc^2)

Female * I(ageGc^2)

1.03

[0.97, 1.10]

.326

id.var__(Intercept)

50.01

49.13

AIC

885.28

882.82

BIC

919.97

907.60

Log-likelihood

-435.64

-436.41

Note. N = 1050. CI = confidence interval.

* p < .05. ** p < .01. *** p < .001.

interactions::interact_plot(model = fit_glmm_2,
                            pred = ageGc,
                            modx = gender,
                            int.width = .685,
                            interval = TRUE)

Effect(c("gender", "ageGc"),fit_glmm_2) %>% 
  data.frame %>% 
  mutate(fit_exp = exp(fit))
   gender ageGc      fit       se    lower   upper fit_exp
1    Male    -6 0.000213 0.000243 2.27e-05 0.00199    1.00
2  Female    -6 0.000437 0.000515 4.32e-05 0.00440    1.00
3    Male    -3 0.000643 0.000566 1.14e-04 0.00360    1.00
4  Female    -3 0.001318 0.001225 2.13e-04 0.00812    1.00
5    Male     0 0.001451 0.001155 3.05e-04 0.00688    1.00
6  Female     0 0.002973 0.002531 5.59e-04 0.01565    1.00
7    Male     3 0.002449 0.001877 5.44e-04 0.01094    1.00
8  Female     3 0.005011 0.004142 9.87e-04 0.02502    1.01
9    Male     6 0.003091 0.002749 5.39e-04 0.01751    1.00
10 Female     6 0.006321 0.005971 9.86e-04 0.03939    1.01
plot_pred_glmm <- fit_glmm_2 %>% 
  emmeans::emmeans(~ gender*ageGc,
                   at = list(ageGc = seq(from = -6, to = 6, by = .1)),
                   type = "response",
                   level = .685) %>% 
  data.frame() %>% 
  mutate(gender = fct_rev(gender)) %>% 
  mutate(age = ageGc + 12) %>% 
  ggplot(aes(x        = age,
             y        = prob,
             group    = gender)) +
  geom_ribbon(aes(ymin = asymp.LCL,
                  ymax = asymp.UCL,
                  fill = gender),
              alpha = .2) +
  geom_line(aes(linetype = gender,
                color    = gender),
            size = 1.5) +
  theme_bw() +
  labs(title = "Generalized Linear Mixed Effects: Model #2, Random Interepts",
       x     = "Child's Age, years",
       y     = "Predicted Probability of Obesity",
       linetype = "Gender",
       fill = "Gender",
       color = "Gender") +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

plot_pred_glmm 

20.8 Compare Methods

plot_pred_glm / plot_pred_gee / plot_pred_glmm

(plot_pred_glm + theme(legend.position = "none")) / 
  (plot_pred_gee + theme(legend.position = "none")) / 
  plot_pred_glmm +
  plot_annotation(tag_levels = "A") +
  plot_layout(guides = "auto")  

df_long %>% 
  dplyr::mutate(pred = predict(fit_glmm_2, re.form = ~ (1|id))) %>% 
  dplyr::mutate(odds = exp(pred)) %>% 
  dplyr::mutate(prob = odds/(1 + odds)) %>% 
  ggplot(aes(x = age_curr,
             y = prob,
             group = id)) +
  geom_line(aes(color = gender)) +
  theme_bw()