15 GEE, Continuous Outcome: Beat the Blues

15.1 PREPARATION

15.1.1 Load Packages

install.packages("devtools")
devtools::install_github("sarbearschwartz/apaSupp") # updated 10/14/2025

Load/activate these packages

library(tidyverse)    
library(flextable)
library(apaSupp)      # Not on CRAN, on GitHub (see above)
library(psych) 
library(lme4)         
library(lmerTest)
library(corrplot)     
library(gee)          
library(geepack)       
library(MuMIn)  
library(interactions)
library(performance)      

library(HSAUR)        # package with the dataset

15.1.2 Background

This dataset was used as an example in Chapter 11 of “A Handbook of Statistical Analysis using R” by Brian S. Everitt and Torsten Hothorn. The authors include this data set in their HSAUR package on CRAN.

The Background

“These data resulted from a clinical trial of an interactive multimedia program called ‘Beat the Blues’ designed to deliver cognitive behavioral therapy (CBT) to depressed patients via a computer terminal. Full details are given here: Proudfoot et. al (2003), but in essence Beat the Blues is an interactive program using multimedia techniques, in particular video vignettes.”

“In a randomized controlled trial of the program, patients with depression recruited in primary care were randomized to either the Beating the Blues program or to”Treatment as Usual” (TAU). Patients randomized to the BEat the Blues also received pharmacology and/or general practice (GP) support and practical/social help,offered as part of treatment as usual, with the exception of any face-to-face counseling or psychological intervention. Patients allocated to TAU received whatever treatment their GP prescribed. The latter included, besides any medication, discussion of problems with GP, provisions of practical/social help, referral to a counselor, referral to a practice nurse, referral to mental health professionals, or further physical examination.

The Research Question

Net of other factors (use of antidepressants and length of the current episode), does the Beat-the-Blues program results in better depression trajectories over treatment as usual?

The Data

The variables are as follows:

drug did the patient take anti-depressant drugs (No or Yes)
length the length of the current episode of depression, a factor with levels:
“<6m” less than six months
“>6m” more than six months
treatment treatment group, a factor with levels:
“TAU” treatment as usual
“BtheB” Beat the Blues
bdi.pre Beck Depression Inventory II, before treatment
bdi.2m Beck Depression Inventory II, after 2 months
bdi.4m Beck Depression Inventory II, after 4 months
bdi.6m Beck Depression Inventory II, after 6 months
bdi.8m Beck Depression Inventory II, after 8 months

15.1.3 Read in the data

data(BtheB, package = "HSAUR")

BtheB %>% 
  psych::headTail()

    drug length treatment bdi.pre bdi.2m bdi.4m bdi.6m bdi.8m
1     No    >6m       TAU      29      2      2   <NA>   <NA>
2    Yes    >6m     BtheB      32     16     24     17     20
3    Yes    <6m       TAU      25     20   <NA>   <NA>   <NA>
4     No    >6m     BtheB      21     17     16     10      9
... <NA>   <NA>      <NA>     ...    ...    ...    ...    ...
97   Yes    <6m       TAU      28   <NA>   <NA>   <NA>   <NA>
98    No    >6m     BtheB      11     22      9     11     11
99    No    <6m       TAU      13      5      5      0      6
100  Yes    <6m       TAU      43   <NA>   <NA>   <NA>   <NA>

15.1.3.1 Wide Format

Tidy up the dataset

WIDE format * One line per person * Good for descriptive tables & correlation matrices

df_btb_wide <- BtheB %>% 
  dplyr::mutate(id = row_number()) %>% # create a new variable to ID participants
  dplyr::select(id, treatment,         # specify that ID variable is first
                drug, length,
                bdi.pre, bdi.2m, bdi.4m, bdi.6m, bdi.8m)

df_btb_wide %>% 
  psych::headTail(top = 10, bottom = 10) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "Wide Format")

**Table 15.1**
Wide Format
id	treatment	drug	length	bdi.pre	bdi.2m	bdi.4m	bdi.6m	bdi.8m
1	TAU	No	>6m	29	2	2
2	BtheB	Yes	>6m	32	16	24	17	20
3	TAU	Yes	<6m	25	20
4	BtheB	No	>6m	21	17	16	10	9
5	BtheB	Yes	>6m	26	23
6	BtheB	Yes	<6m	7	0	0	0	0
7	TAU	Yes	<6m	17	7	7	3	7
8	TAU	No	>6m	20	20	21	19	13
9	BtheB	Yes	<6m	18	13	14	20	11
10	BtheB	Yes	>6m	20	5	5	8	12
...				...	...	...	...	...
91	TAU	No	<6m	16
92	BtheB	Yes	<6m	30	26	28
93	BtheB	Yes	<6m	17	8	7	12
94	BtheB	No	>6m	19	4	3	3	3
95	BtheB	No	>6m	16	11	4	2	3
96	BtheB	Yes	>6m	16	16	10	10	8
97	TAU	Yes	<6m	28
98	BtheB	No	>6m	11	22	9	11	11
99	TAU	No	<6m	13	5	5	0	6
100	TAU	Yes	<6m	43

15.1.3.2 Long Format

Restructure to long format

LONG FORMAT * One line per observation * Good for plots and modeling

df_btb_long <- df_btb_wide %>% 
  tidyr::pivot_longer(cols = c(bdi.2m, bdi.4m, bdi.6m, bdi.8m),
                      names_to = "month", 
                      names_pattern = "bdi.(.)m",
                      values_to = "bdi") %>% 
  dplyr::mutate(month = as.numeric(month)) %>% 
  dplyr::filter(complete.cases(id, bdi, treatment, month)) %>% 
  dplyr::arrange(id, month) %>% 
  dplyr::select(id, treatment, drug, length, bdi.pre, month, bdi)

df_btb_long %>% 
  psych::headTail(top = 10, bottom = 10) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "Long Format")

**Table 15.2**
Long Format
id	treatment	drug	length	bdi.pre	month	bdi
1	TAU	No	>6m	29	2	2
1	TAU	No	>6m	29	4	2
2	BtheB	Yes	>6m	32	2	16
2	BtheB	Yes	>6m	32	4	24
2	BtheB	Yes	>6m	32	6	17
2	BtheB	Yes	>6m	32	8	20
3	TAU	Yes	<6m	25	2	20
4	BtheB	No	>6m	21	2	17
4	BtheB	No	>6m	21	4	16
4	BtheB	No	>6m	21	6	10
...				...	...	...
96	BtheB	Yes	>6m	16	6	10
96	BtheB	Yes	>6m	16	8	8
98	BtheB	No	>6m	11	2	22
98	BtheB	No	>6m	11	4	9
98	BtheB	No	>6m	11	6	11
98	BtheB	No	>6m	11	8	11
99	TAU	No	<6m	13	2	5
99	TAU	No	<6m	13	4	5
99	TAU	No	<6m	13	6	0
99	TAU	No	<6m	13	8	6

15.2 EXPLORATORY DATA ANLAYSIS

15.2.1 Visualize: Person-profile Plots

Create spaghetti plots of the raw, observed data

df_btb_long %>% 
  dplyr::mutate(treatment = treatment %>% 
                  forcats::fct_recode("Treatment as Usual" = "TAU",
                                      "Beat the Blues Program" = "BtheB")) %>% 
  ggplot(aes(x = month,
             y = bdi)) +
  geom_point() +
  geom_line(aes(group = id), 
            size = 1, 
            alpha = 0.3) +
  geom_smooth(method = "lm") +
  theme_bw() +
  facet_grid(.~ treatment) +
  labs(x = "Time from Baseline, months",
       y = "Beck Depression Inventory Score")

Figure 15.1
Observed Trajectories of Depression Across Time with LM Smoother by Treatment

df_btb_long %>% 
  dplyr::mutate(treatment = treatment %>% 
                  forcats::fct_recode("Treatment as Usual" = "TAU",
                                      "Beat the Blues Program" = "BtheB")) %>% 
  dplyr::mutate(drug = drug %>% 
                  forcats::fct_recode("Not Using Antidepressants" = "No",
                                      "Using Antidepressants" = "Yes")) %>% 
  ggplot(aes(x = month,
             y = bdi)) +
  geom_line(aes(group = id), 
            size = 1, 
            alpha = 0.3) +
  geom_smooth(method = "lm") +
  facet_grid(drug ~ treatment) +
  theme_bw() +
  labs(x = "Time from Baseline, months",
       y = "Beck Depression Inventory Score")

Figure 15.2
Observed Trajectories of Depression Across Time with LM Smoother by Treatment and Antidepressant Use

df_btb_long %>% 
  dplyr::mutate(treatment = treatment %>% 
                  forcats::fct_recode("Treatment as Usual" = "TAU",
                                      "Beat the Blues Program" = "BtheB")) %>% 
  dplyr::mutate(drug = drug %>% 
                  forcats::fct_recode("Not Using Antidepressants" = "No",
                                      "Using Antidepressants" = "Yes")) %>% 
  ggplot(aes(x = month,
             y = bdi)) +
  geom_line(aes(group = id,
                color = length), 
            size = 1, 
            alpha = 0.3) +
  geom_smooth(aes(color = length),
              method = "lm",
              size = 1.25,
              se = FALSE) +
  facet_grid(drug ~ treatment) +
  theme_bw() +
  labs(x = "Time from Baseline, months",
       y = "Beck Depression Inventory Score",
       color = "Current Episode Length") +
  theme(legend.position = "inside",
        legend.position.inside = c(1, 1),
        legend.justification = c(1.1, 1.1),
        legend.key.width = unit(2, "cm"),
        legend.background = element_rect(color = "black"))

Figure 15.3
Observed Trajectories of Depression Across Time with LM Smoother by Treatment, Antidepressant Use, and Length of Current Episode

df_btb_long %>% 
  dplyr::mutate(treatment = treatment %>% 
                  forcats::fct_recode("Treatment as Usual" = "TAU",
                                      "Beat the Blues Program" = "BtheB")) %>% 
  ggplot(aes(x = month,
             y = bdi,
             color = treatment,
             fill = treatment)) +
  geom_smooth(method = "lm") +
  theme_bw()  +
  labs(x = "Time from Baseline, months",
       y = "Beck Depression Inventory Score",
       color = NULL,
       fill = NULL) +
  theme(legend.position = "inside",
        legend.position.inside = c(1, 1),
        legend.justification = c(1.1, 1.1),
        legend.key.width = unit(2, "cm"),
        legend.background = element_rect(color = "black"))

Figure 15.4
Linear Regression Line for Subgroup Analysis by Treatment

15.2.2 Calculate the Observed Correlation Structure

bdi_corr <- df_btb_wide %>% 
  dplyr::select(starts_with("bdi")) %>% 
  stats::cor(use="pairwise.complete.obs")

bdi_corr

          bdi.pre    bdi.2m    bdi.4m    bdi.6m    bdi.8m
bdi.pre 1.0000000 0.6142207 0.5691248 0.5077286 0.3835090
bdi.2m  0.6142207 1.0000000 0.7903346 0.7849188 0.7038158
bdi.4m  0.5691248 0.7903346 1.0000000 0.8166591 0.7220149
bdi.6m  0.5077286 0.7849188 0.8166591 1.0000000 0.8107773
bdi.8m  0.3835090 0.7038158 0.7220149 0.8107773 1.0000000

df_btb_wide %>% 
  dplyr::select(starts_with("bdi")) %>% 
  apaSupp::tab_cor(caption = "Correlation Between Repeated Measures of Depression")

**Table 15.3**
Correlation Between Repeated Measures of Depression
Variable Pair		r	p
bdi.pre	bdi.2m	.610	< .001***
bdi.pre	bdi.4m	.570	< .001***
bdi.pre	bdi.6m	.510	< .001***
bdi.pre	bdi.8m	.380	.005**
bdi.2m	bdi.4m	.790	< .001***
bdi.2m	bdi.6m	.780	< .001***
bdi.2m	bdi.8m	.700	< .001***
bdi.4m	bdi.6m	.820	< .001***
bdi.4m	bdi.8m	.720	< .001***
bdi.6m	bdi.8m	.810	< .001***
Note. N = 100. r = Pearson's Product-Moment correlation coefficient.
* p < .05. p < .01. * p < .001.

15.2.3 Plot the correlation matrix to get a better feel for the pattern

bdi_corr %>% 
  corrplot::corrplot.mixed(upper = "ellipse")

15.3 LINEAR REGRESSION, LM

This ignores any correlation between repeated measures on the same individual and treats all observations as independent.

15.3.1 Fit the models

btb_lm_1 <- stats::lm(bdi ~ bdi.pre + length + drug + treatment + month,
                      data = df_btb_long)

btb_lm_2 <- stats::lm(bdi ~ bdi.pre + length + drug + treatment*month,
                      data = df_btb_long)

btb_lm_3 <- stats::lm(bdi ~ bdi.pre + length + drug + treatment + drug*month,
                      data = df_btb_long)

btb_lm_4 <- stats::lm(bdi ~ bdi.pre + length + drug*treatment*month,
                      data = df_btb_long)

15.3.2 Parameter Estimates Table

apaSupp::tab_lms(list(btb_lm_1, btb_lm_2, btb_lm_3, btb_lm_4),
                 narrow = TRUE,
                 caption = "Multiple Linear Regression - Ignoring correlation WRONG!")

**Table 15.4**
Multiple Linear Regression - Ignoring correlation WRONG!
	Model 1		Model 2		Model 3		Model 4
Variable	b	(SE)	b	(SE)	b	(SE)	b	(SE)
(Intercept)	7.88	(1.78) ***	7.77	(2.08) ***	7.21	(2.03) ***	7.33	(2.30) **
bdi.pre	0.57	(0.05) ***	0.57	(0.05) ***	0.57	(0.05) ***	0.56	(0.05) ***
length
<6m	—	—	—	—	—	—	—	—
>6m	1.75	(1.11)	1.75	(1.11)	1.78	(1.11)	1.86	(1.10)
drug
No	—	—	—	—	—	—	—	—
Yes	-3.55	(1.14) **	-3.55	(1.15) **	-2.10	(2.39)	-2.00	(3.75)
treatment
TAU	—	—	—	—	—	—	—	—
BtheB	-3.35	(1.10) **	-3.13	(2.36)	-3.36	(1.10) **	-3.31	(3.13)
month	-0.96	(0.23) ***	-0.93	(0.34) **	-0.82	(0.31) **	-0.60	(0.40)
treatment * month
BtheB * month			-0.05	(0.47)			-0.56	(0.63)
drug * month
Yes * month					-0.32	(0.47)	-1.02	(0.73)
drug * treatment
Yes * BtheB							-0.23	(4.92)
drug * treatment * month
Yes * BtheB * month							1.31	(0.98)
AIC	2011.01		2013.00		2012.52		2009.78
BIC	2036.45		2042.08		2041.60		2049.77
R²	.398		.398		.399		.417
Adjusted R²	.387		.385		.386		.398
Note.
* p < .05. p < .01. * p < .001.

15.4 MULTILEVEL MODELING, MLM

15.4.1 Fit the models

btb_lmer_RI   <- lmerTest::lmer(bdi ~ bdi.pre + length + drug + treatment + 
                                  month + (1 | id), 
                                data = df_btb_long, 
                                REML = TRUE)

btb_lmer_RIAS <- lmerTest::lmer(bdi ~ bdi.pre + length + drug + treatment + 
                                  month + (month | id), 
                                data = df_btb_long, 
                                REML = TRUE,
                                control = lmerControl(optimizer = "Nelder_Mead"))

15.4.2 Parameter Estimates Table

apaSupp::tab_lmers(list("MLM - RI"   = btb_lmer_RI,
                        "MLM - RIAS" = btb_lmer_RIAS))

**Table 15.5**
Compare MLM Models
	MLM - RI			MLM - RIAS
Variable	b	(SE)	p	b	(SE)	p
(Intercept)	5.92	(2.31)	.012*	5.94	(2.30)	.011*
bdi.pre	0.64	(0.08)	< .001***	0.64	(0.08)	< .001***
length
<6m	—	—		—	—
>6m	0.24	(1.68)	.887	0.10	(1.67)	.954
drug
No	—	—		—	—
Yes	-2.79	(1.77)	.118	-2.89	(1.76)	.105
treatment
TAU	—	—		—	—
BtheB	-2.36	(1.71)	.171	-2.49	(1.71)	.149
month	-0.71	(0.15)	< .001***	-0.70	(0.16)	< .001***
id.var__(Intercept)	51.44			50.56
Residual.var__Observation	25.27			23.87
id.cov__(Intercept).month				-0.31
id.var__month				0.23
AIC	1,882			1,885
BIC	1,911			1,922
Log-likelihood	-933			-933
Note.
* p < .05. p < .01. * p < .001.

15.4.3 Likelihood Ratio Test

anova(btb_lmer_RI, 
      btb_lmer_RIAS, 
      refit = FALSE)

Data: df_btb_long
Models:
btb_lmer_RI: bdi ~ bdi.pre + length + drug + treatment + month + (1 | id)
btb_lmer_RIAS: bdi ~ bdi.pre + length + drug + treatment + month + (month | id)
              npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
btb_lmer_RI      8 1882.1 1911.2 -933.04    1866.1                     
btb_lmer_RIAS   10 1885.2 1921.5 -932.58    1865.2 0.9236  2     0.6301

15.4.4 Plot the model predictions

interactions::interact_plot(model = btb_lmer_RI,
                            pred = month,
                            modx = treatment,
                            modx.labels = c("Treatment as Usual",
                                            "Beat the Blues Program"),
                            mod2 = drug,
                            mod2.labels = c("Not Using Antidepressants",
                                            "Using Antidepressants"),
                            legend.main = "Treatment",
                            interval = TRUE) +
  theme_bw()  +
  labs(x = "Time from Baseline, months",
       y = "Estimated Margial Mean\nBeck Depression Inventory Score") +
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

Figure 15.5
Estimated Marginal Means from a Multilevel Model (MLM)

effects::Effect(c("treatment", "month", "drug"), 
                mod = btb_lmer_RI) %>% 
  data.frame %>%   
  dplyr::mutate(treatment = treatment %>% 
                  forcats::fct_recode("Treatment as Usual" = "TAU",
                                      "Beat the Blues Program" = "BtheB")) %>% 
  dplyr::mutate(drug = drug %>% 
                  forcats::fct_recode("Not Using Antidepressants" = "No",
                                      "Using Antidepressants" = "Yes")) %>% 
  dplyr::mutate(treatment = fct_reorder2(treatment, month, fit)) %>%
  ggplot(aes(x = month,
             y = fit)) +
  geom_line(aes(color = treatment,
                linetype = treatment),
            size = 1) +
  geom_ribbon(aes(ymin = lower,
                  ymax = upper,
                  fill = treatment), 
              alpha = 0.3) +
  theme_bw() +
  facet_grid(.~ drug)+
  labs(x = "Time from Baseline, months",
       y = "Estimated Margial Mean\nBeck Depression Inventory Score") +
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

15.5 GENERAL ESTIMATING EQUATIONS, GEE

15.5.1 Explore Various Correlation Structures

Main effects to determine correlation structure

15.5.1.1 Fit the models - `gee::gee()`

btb_gee_in <- gee::gee(bdi ~ bdi.pre + length + drug + treatment + month, 
                       data = df_btb_long, 
                       id = id,
                       family = gaussian, 
                       corstr = 'independence',
                                 scale.fix = TRUE, 
                                 scale.value = 1)

   (Intercept)        bdi.pre      length>6m        drugYes treatmentBtheB 
     7.8830747      0.5723729      1.7530800     -3.5460058     -3.3539662 
         month 
    -0.9608077

btb_gee_ex <- gee::gee(bdi ~ bdi.pre + length + drug + treatment + month, 
                       data = df_btb_long, 
                       id = id, 
                       family = gaussian, 
                       corstr = 'exchangeable',
                                 scale.fix = TRUE, 
                                 scale.value = 1)

   (Intercept)        bdi.pre      length>6m        drugYes treatmentBtheB 
     7.8830747      0.5723729      1.7530800     -3.5460058     -3.3539662 
         month 
    -0.9608077

# btb_gee_ar <- gee::gee(bdi ~ bdi.pre + length + drug + treatment + month,
#                        data = df_btb_long,
#                        id = id,
#                        family = gaussian,
#                        corstr = "AR-M", Mv = 1,
                                 # scale.fix = TRUE, 
                                 # scale.value = 1)
# 
# cgee: M-dependence, M=1, but clustsize=1
# fatal error for this model

btb_gee_un <- gee::gee(bdi ~ bdi.pre + length + drug + treatment + month, 
                       data = df_btb_long, 
                       id = id,
                       family = gaussian, 
                       corstr = 'unstructured',
                                 scale.fix = TRUE, 
                                 scale.value = 1)

   (Intercept)        bdi.pre      length>6m        drugYes treatmentBtheB 
     7.8830747      0.5723729      1.7530800     -3.5460058     -3.3539662 
         month 
    -0.9608077

summary(btb_gee_in)


 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Identity 
 Variance to Mean Relation: Gaussian 
 Correlation Structure:     Independent 

Call:
gee::gee(formula = bdi ~ bdi.pre + length + drug + treatment + 
    month, id = id, data = df_btb_long, family = gaussian, corstr = "independence", 
    scale.fix = TRUE, scale.value = 1)

Summary of Residuals:
         Min           1Q       Median           3Q          Max 
-24.20158432  -5.31202378   0.01101526   5.29503741  27.77789553 


Coefficients:
                 Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
(Intercept)     7.8830747 0.205750420  38.31377  2.19973944  3.583640
bdi.pre         0.5723729 0.006339623  90.28500  0.08853253  6.465114
length>6m       1.7530800 0.128096295  13.68564  1.41954159  1.234962
drugYes        -3.5460058 0.132278613 -26.80710  1.73069664 -2.048889
treatmentBtheB -3.3539662 0.126920001 -26.42583  1.71390982 -1.956909
month          -0.9608077 0.026882849 -35.74055  0.17688635 -5.431780

Estimated Scale Parameter:  1
Number of Iterations:  1

Working Correlation
     [,1] [,2] [,3] [,4]
[1,]    1    0    0    0
[2,]    0    1    0    0
[3,]    0    0    1    0
[4,]    0    0    0    1

summary(btb_gee_ex)


 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Identity 
 Variance to Mean Relation: Gaussian 
 Correlation Structure:     Exchangeable 

Call:
gee::gee(formula = bdi ~ bdi.pre + length + drug + treatment + 
    month, id = id, data = df_btb_long, family = gaussian, corstr = "exchangeable", 
    scale.fix = TRUE, scale.value = 1)

Summary of Residuals:
        Min          1Q      Median          3Q         Max 
-25.4478843  -6.3276726  -0.8152833   4.3622258  25.4078115 


Coefficients:
                 Estimate  Naive S.E.    Naive z Robust S.E.   Robust z
(Intercept)     5.8855129 0.264574859  22.245171  2.10712166  2.7931529
bdi.pre         0.6399964 0.009146473  69.971936  0.07931263  8.0692874
length>6m       0.2084783 0.192618294   1.082339  1.48052530  0.1408137
drugYes        -2.7742506 0.203113138 -13.658647  1.64824318 -1.6831561
treatmentBtheB -2.3360241 0.196537140 -11.885917  1.66217026 -1.4054060
month          -0.7078407 0.016228921 -43.616005  0.15394156 -4.5981134

Estimated Scale Parameter:  1
Number of Iterations:  5

Working Correlation
          [,1]      [,2]      [,3]      [,4]
[1,] 1.0000000 0.6915241 0.6915241 0.6915241
[2,] 0.6915241 1.0000000 0.6915241 0.6915241
[3,] 0.6915241 0.6915241 1.0000000 0.6915241
[4,] 0.6915241 0.6915241 0.6915241 1.0000000

# summary(btb_gee_ar)

summary(btb_gee_un)


 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Identity 
 Variance to Mean Relation: Gaussian 
 Correlation Structure:     Unstructured 

Call:
gee::gee(formula = bdi ~ bdi.pre + length + drug + treatment + 
    month, id = id, data = df_btb_long, family = gaussian, corstr = "unstructured", 
    scale.fix = TRUE, scale.value = 1)

Summary of Residuals:
        Min          1Q      Median          3Q         Max 
-25.1527937  -6.1091139  -0.5896205   4.7316139  25.9041542 


Coefficients:
                 Estimate  Naive S.E.    Naive z Robust S.E.   Robust z
(Intercept)     6.3905215 0.261722706  24.417146  2.15668950  2.9631162
bdi.pre         0.6171798 0.008860129  69.658110  0.08081777  7.6366846
length>6m       0.5834398 0.184908020   3.155297  1.46837275  0.3973377
drugYes        -2.7908835 0.194277260 -14.365467  1.63741987 -1.7044398
treatmentBtheB -2.4261698 0.187935122 -12.909613  1.65519523 -1.4657907
month          -0.7628336 0.020731817 -36.795308  0.15643591 -4.8763329

Estimated Scale Parameter:  1
Number of Iterations:  5

Working Correlation
          [,1]      [,2]      [,3]      [,4]
[1,] 1.0000000 0.7069560 0.5704892 0.4714744
[2,] 0.7069560 1.0000000 0.6086188 0.4637445
[3,] 0.5704892 0.6086188 1.0000000 0.5454963
[4,] 0.4714744 0.4637445 0.5454963 1.0000000

15.5.1.2 Fit the models - `geepack::geeglm()`

The output below each model is the ‘starting’ model assuming independence, so they will all be the same here.

btb_geeglm_in <- geepack::geeglm(bdi ~ bdi.pre + length + drug + treatment + month, 
                                 data = df_btb_long, 
                                 id = id,
                                 wave = month,
                                 family = gaussian, 
                                 corstr = 'independence')

btb_geeglm_ex <- geepack::geeglm(bdi ~ bdi.pre + length + drug + treatment + month, 
                                 data = df_btb_long, 
                                 id = id, 
                                 wave = month,
                                 family = gaussian, 
                                 corstr = 'exchangeable')

btb_geeglm_ar <- geepack::geeglm(bdi ~ bdi.pre + length + drug + treatment + month, 
                                 data = df_btb_long, 
                                 id = id, 
                                 wave = month,
                                 family = gaussian, 
                                 corstr = 'ar1')

btb_geeglm_un <- geepack::geeglm(bdi ~ bdi.pre + length + drug + treatment + month, 
                                 data = df_btb_long, 
                                 id = id,
                                 wave = month,
                                 family = gaussian, 
                                 corstr = 'unstructured')

15.5.1.3 Working Correlation Matrix

btb_geeglm_ex$geese$alpha

    alpha 
0.6945624

btb_geeglm_ar$geese$alpha

    alpha 
0.7992866

btb_geeglm_un$geese$alpha

alpha.1:2 alpha.1:3 alpha.1:4 alpha.2:3 alpha.2:4 alpha.3:4 
0.6767183 0.6991541 0.6506675 0.7408944 0.6382619 0.7488595

15.5.1.4 Parameter Estimates Table

apaSupp::tab_gees(list("Exchangeable" = btb_geeglm_ex,
                       "Autoregressive" = btb_geeglm_ar,
                       "Unstructured" = btb_geeglm_un),
                  narrow = TRUE)

**Table 15.6**
Compare GEE Models
	Exchangeable		Autoregressive		Unstructured
	b	95% CI	b	95% CI	b	95% CI
(Intercept)	5.88	[1.75, 10.01]**	6.62	[2.30, 10.93]**	6.07	[1.91, 10.23]**
bdi.pre	0.64	[0.48, 0.80]***	0.60	[0.44, 0.75]***	0.63	[0.47, 0.79]***
length
<6m	—	—	—	—	—	—
>6m	0.20	[-2.70, 3.11]	0.71	[-2.26, 3.68]	0.22	[-2.67, 3.11]
drug
No	—	—	—	—	—	—
Yes	-2.77	[-6.00, 0.46]	-2.37	[-5.55, 0.82]	-2.72	[-5.93, 0.49]
treatment
TAU	—	—	—	—	—	—
BtheB	-2.33	[-5.59, 0.92]	-2.51	[-5.74, 0.72]	-2.38	[-5.62, 0.86]
month	-0.71	[-1.01, -0.41]***	-0.74	[-1.05, -0.42]***	-0.71	[-1.01, -0.40]***
Note.
* p < .05. p < .01. * p < .001.

Can’t Use the Likelihood Ratio Test

The anova() function is used to compare nested models for parameters (fixed effects), not correlation structures.

anova(btb_geeglm_in, btb_geeglm_ex)

Models are identical

NULL

anova(btb_geeglm_in, btb_geeglm_ar)

Models are identical

NULL

anova(btb_geeglm_in, btb_geeglm_un)

Models are identical

NULL

15.5.1.5 Variaous QIC Measures of Fit

References:

Pan, W. 2001. Akaike’s information criterion in generalized estimating equations. Biometrics 57:120-125. https://onlinelibrary.wiley.com/doi/abs/10.1111/j.0006-341X.2001.00120.x
Burnham, K. P. and D. R. Anderson. 2002. Model selection and multimodel inference: a practical information-theoretic approach. Second edition. Springer Science and Business Media, Inc., New York. https://cds.cern.ch/record/1608735/files/9780387953649_TOC.pdf

The QIC() is one way to try to measure model fit. You can enter more than one model into a single function call.

QIC(I) based on independence model <– suggested by Pan (Biometric, March 2001), asymptotically unbiased estimator (choose the correlation stucture that produces the smallest QIC(I), p122)

MuMIn::QIC(btb_geeglm_in, 
           btb_geeglm_ex, 
           btb_geeglm_ar, 
           btb_geeglm_un, 
           typeR = FALSE)

                   QIC
btb_geeglm_in 306.5589
btb_geeglm_ex 296.2554
btb_geeglm_ar 298.2556
btb_geeglm_un 296.5794

QIC(R) is based on quasi-likelihood of a working correlation R model, can NOT be used to select the working correlation matrix.

MuMIn::QIC(btb_geeglm_in, 
           btb_geeglm_ex, 
           btb_geeglm_ar, 
           btb_geeglm_un, 
           typeR = TRUE)    # NOT the default

                   QIC
btb_geeglm_in 306.5589
btb_geeglm_ex 304.5003
btb_geeglm_ar 304.6425
btb_geeglm_un 304.4087

QIC_U(R) approximates QIC(R), and while both are useful for variable selection, they can NOT be applied to select the working correlation matrix.

MuMIn::QICu(btb_geeglm_in, 
            btb_geeglm_ex, 
            btb_geeglm_ar, 
            btb_geeglm_un)

                  QICu
btb_geeglm_in 292.0000
btb_geeglm_ex 283.7551
btb_geeglm_ar 285.6132
btb_geeglm_un 284.1707

MuMIn::model.sel(btb_geeglm_in, 
                 btb_geeglm_ex, 
                 btb_geeglm_ar, 
                 btb_geeglm_un, 
                 rank = "QIC")     #sorts the best to the TOP, uses QIC(I)

Model selection table 
              (Int) bdi.pre drg lng     mnt trt corstr qLik   QIC delta weight
btb_geeglm_ex 5.880  0.6402   +   + -0.7070   + exchng -140 296.3  0.00  0.450
btb_geeglm_un 6.068  0.6307   +   + -0.7061   + unstrc -140 296.6  0.32  0.382
btb_geeglm_ar 6.620  0.5956   +   + -0.7357   +    ar1 -140 298.3  2.00  0.165
btb_geeglm_in 7.883  0.5724   +   + -0.9608   + indpnd -140 306.6 10.30  0.003
Abbreviations:
 corstr: exchng = 'exchangeable', indpnd = 'independence', 
         unstrc = 'unstructured'
Models ranked by QIC(x)

15.5.2 Final “Best” Model

15.5.2.1 Tabulate Model Parameters

apaSupp::tab_gee(btb_geeglm_ex,
                 var_labels = c("bdi.pre" = "Baseline BDI",
                                "length" = "Current Episode Length",
                                "drug" = "Antidepressant Use",
                                "treatment" = "Intervention",
                                "month" = "Time, Months"),
                 caption = "Final GEE, exchangeable correlation")

**Table 15.7**
Final GEE, exchangeable correlation
	b	(SE)	p
(Intercept)	5.88	(2.11)	.005**
Baseline BDI	0.64	(0.08)	< .001***
Current Episode Length
<6m	—	—
>6m	0.20	(1.48)	.890
Antidepressant Use
No	—	—
Yes	-2.77	(1.65)	.093
Intervention
TAU	—	—
BtheB	-2.33	(1.66)	.160
Time, Months	-0.71	(0.15)	< .001***
Note. N = 280 observations on 97 participants. Correlation structure: exchangeable
* p < .05. p < .01. * p < .001.

15.5.2.2 Plot the Marginal Effects

interactions::interact_plot(model = btb_geeglm_ex,
                            pred = "month",
                            modx = "treatment")

Do not worry about confidence intervals.

expand.grid(bdi.pre = 23,
            length = "<6m",
            drug = "No",
            treatment = levels(df_btb_long$treatment),
            month = seq(from = 2, to = 8, by = 2)) %>%  
  dplyr::mutate(fit_in = predict(btb_geeglm_in,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_ex = predict(btb_geeglm_ex,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_ar = predict(btb_geeglm_ar,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_un = predict(btb_geeglm_un,
                                 newdata = .,
                                 type = "response")) %>% 
  tidyr::pivot_longer(cols = starts_with("fit_"),
                      names_to = "covR",
                      names_pattern = "fit_(.*)",
                      values_to = "fit") %>% 
  dplyr::mutate(covR = factor(covR, 
                              levels = c("un", "ar", "ex", "in"),
                              labels = c("Unstructured",
                                         "Auto-Regressive",
                                         "Compound Symetry",
                                         "Independence"))) %>% 
  ggplot(aes(x = month,
             y = fit,
             linetype = treatment)) +
  geom_line(alpha = 0.6) +
  theme_bw() +
  labs(title    = "BtheB - Predictions from four GEE models (geeglm)",
       x = "Month",
       y = "Predicted BDI",
       linetype = "Treatment:") +
  scale_linetype_manual(values = c("solid", "longdash")) +
  theme(legend.key.width = unit(1, "cm")) +
  facet_wrap(~ covR)

expand.grid(bdi.pre = 23,
            length = "<6m",
            drug = "No",
            treatment = levels(df_btb_long$treatment),
            month = seq(from = 2, to = 8, by = 2)) %>%  
  dplyr::mutate(fit_in = predict(btb_geeglm_in,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_ex = predict(btb_geeglm_ex,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_ar = predict(btb_geeglm_ar,
                                 newdata = .,
                                 type = "response")) %>% 
  dplyr::mutate(fit_un = predict(btb_geeglm_un,
                                 newdata = .,
                                 type = "response")) %>% 
  tidyr::pivot_longer(cols = starts_with("fit_"),
                      names_to = "covR",
                      names_pattern = "fit_(.*)",
                      values_to = "fit") %>% 
  dplyr::mutate(covR = factor(covR, 
                              levels = c("un", "ar", "ex", "in"),
                              labels = c("Unstructured",
                                         "Auto-Regressive",
                                         "Compound Symetry",
                                         "Independence"))) %>% 
  ggplot(aes(x = month,
             y = fit,
             color = covR,
             linetype = treatment)) +
  geom_line(alpha = 0.6) +
  theme_bw() +
  labs(title    = "BtheB - Predictions from four GEE models (geeglm)",
       x = "Month",
       y = "Predicted BDI",
       color = "Covariance Pattern:",
       linetype = "Treatment:") +
  scale_linetype_manual(values = c("solid", "longdash")) +
  scale_size_manual(values     = c(2, 1, 1, 1)) +
  scale_color_manual(values    = c("red", 
                                   "dodgerblue",
                                   "blue",
                                   "darkgreen")) +
  theme(legend.key.width = unit(1, "cm"))

15.5.3 Investigate interactions NOT with time (`month`)

btb_geeglm_ex_1 <- geepack::geeglm(bdi ~ bdi.pre*length + drug + treatment + month, 
                                   data = df_btb_long, 
                                   id = id, 
                                   wave = month,
                                   family = gaussian, 
                                   corstr = 'exchangeable')

btb_geeglm_ex_2 <- geepack::geeglm(bdi ~ bdi.pre*drug + length + treatment + month, 
                                   data = df_btb_long, 
                                   id = id, 
                                   wave = month,
                                   family = gaussian, 
                                   corstr = 'exchangeable')

btb_geeglm_ex_3 <- geepack::geeglm(bdi ~  bdi.pre*treatment + length + drug + month, 
                                   data = df_btb_long, 
                                   id = id, 
                                   wave = month,
                                   family = gaussian, 
                                   corstr = 'exchangeable')

apaSupp::tab_gees(list("Episode Length" = btb_geeglm_ex_1,
                       "Antidepressant" = btb_geeglm_ex_2,
                       "Intervention" = btb_geeglm_ex_3),
                  narrow = TRUE,
                  caption = "GEE (exchangable): Interactions, not with Time")

**Table 15.8**
GEE (exchangable): Interactions, not with Time
	Episode Length		Antidepressant		Intervention
	b	95% CI	b	95% CI	b	95% CI
(Intercept)	3.24	[-0.65, 7.14]	3.37	[-1.89, 8.62]	3.62	[-1.82, 9.06]
bdi.pre	0.76	[0.61, 0.90]***	0.75	[0.51, 0.99]***	0.74	[0.49, 0.98]***
length
<6m	—	—	—	—	—	—
>6m	5.66	[-0.75, 12.07]	0.47	[-2.40, 3.34]	0.22	[-2.67, 3.12]
drug
No	—	—	—	—	—	—
Yes	-2.25	[-5.42, 0.92]	1.47	[-4.80, 7.74]	-2.76	[-5.97, 0.45]
treatment
TAU	—	—	—	—	—	—
BtheB	-2.42	[-5.61, 0.77]	-2.19	[-5.45, 1.08]	1.24	[-5.40, 7.87]
month	-0.70	[-1.00, -0.40]***	-0.71	[-1.01, -0.41]***	-0.71	[-1.01, -0.41]***
bdi.pre * length
bdi.pre * >6m	-0.23	[-0.55, 0.08]
bdi.pre * drug
bdi.pre * Yes			-0.19	[-0.51, 0.14]
bdi.pre * treatment
bdi.pre * BtheB					-0.15	[-0.48, 0.17]
Note.
* p < .05. p < .01. * p < .001.

15.5.4 Investigate interactions with time (`month`)

btb_geeglm_ex_11 <- geepack::geeglm(bdi ~ bdi.pre + length + drug + treatment*month, 
                                    data = df_btb_long, 
                                    id = id, 
                                    wave = month,
                                    family = gaussian, 
                                    corstr = 'exchangeable')

btb_geeglm_ex_12 <- geepack::geeglm(bdi ~ bdi.pre + length + treatment + drug*month, 
                                    data = df_btb_long, 
                                    id = id, 
                                    wave = month,
                                    family = gaussian, 
                                    corstr = 'exchangeable')

btb_geeglm_ex_13 <- geepack::geeglm(bdi ~ bdi.pre + drug + treatment +  length*month, 
                                    data = df_btb_long, 
                                    id = id, 
                                    wave = month,
                                    family = gaussian, 
                                    corstr = 'exchangeable')

btb_geeglm_ex_14 <- geepack::geeglm(bdi ~ length + drug + treatment + bdi.pre*month, 
                                    data = df_btb_long, 
                                    id = id, 
                                    wave = month,
                                    family = gaussian, 
                                    corstr = 'exchangeable')

apaSupp::tab_gees(list("Intervention"   = btb_geeglm_ex_11,
                       "Antidepressant" = btb_geeglm_ex_12,
                       "Episode length" = btb_geeglm_ex_13,
                       "Baseline BDI"   = btb_geeglm_ex_14),
                  narrow = TRUE,
                  caption = "GEE (exchangable): Interactions with Time")

**Table 15.9**
GEE (exchangable): Interactions with Time
	Intervention		Antidepressant		Episode length		Baseline BDI
	b	95% CI	b	95% CI	b	95% CI	b	95% CI
(Intercept)	6.83	[2.48, 11.19]**	6.63	[2.29, 10.97]**	6.28	[2.07, 10.49]**	5.09	[0.50, 9.67]*
bdi.pre	0.64	[0.48, 0.80]***	0.64	[0.48, 0.80]***	0.64	[0.48, 0.79]***	0.67	[0.50, 0.85]***
length
<6m	—	—	—	—	—	—	—	—
>6m	0.25	[-2.67, 3.17]	0.16	[-2.76, 3.08]	-0.44	[-4.04, 3.15]	0.27	[-2.64, 3.18]
drug
No	—	—	—	—	—	—	—	—
Yes	-2.74	[-5.99, 0.50]	-4.31	[-8.25, -0.37]*	-2.79	[-6.01, 0.43]	-2.72	[-5.96, 0.53]
treatment
TAU	—	—	—	—	—	—	—	—
BtheB	-4.23	[-8.16, -0.30]*	-2.32	[-5.60, 0.96]	-2.31	[-5.56, 0.93]	-2.36	[-5.61, 0.90]
month	-0.95	[-1.45, -0.45]***	-0.88	[-1.28, -0.48]***	-0.80	[-1.11, -0.50]***	-0.50	[-1.18, 0.17]
treatment * month
BtheB * month	0.47	[-0.12, 1.06]
drug * month
Yes * month			0.38	[-0.21, 0.98]
length * month
>6m * month					0.16	[-0.40, 0.73]
bdi.pre * month							-0.01	[-0.04, 0.02]
Note.
* p < .05. p < .01. * p < .001.

15.6 Plot Best Model

Now only plot the significant variables for the ‘best’ model.

interactions::interact_plot(model = btb_geeglm_ex,
                            pred = "month",
                            modx = "bdi.pre")

interactions::interact_plot(model = btb_geeglm_ex,
                            pred = "month",
                            modx = "bdi.pre",
                            modx.values = c(10, 20, 30),
                            at = list(length = "<6m",
                                      drug = "No",
                                      treatment = "TAU"),
                            colors = rep("black", times = 3),
                            x.label = "Month",
                            y.label = "Predicted BDI",
                            legend.main    = "Baseline BDI:") +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.key.width = unit(2, "cm")) +
  labs(title    = "BtheB - Predictions from the GEE model (exchangable)",
       subtitle = "Trajectory for a person with BL depression < 6 months and randomized to TAU")

15.7 Conclusion