14 Ex: Logistic - Depression (Hoffman)

Compiled: April 22, 2025

14.1 PREPARATION

14.1.1 Load Packages

# library(remotes)
# remotes::install_github("sarbearschwartz/apaSupp")  # updated: 4/15/25
# remotes::install_github("ddsjoberg/gtsummary")


library(tidyverse)
library(haven)        
library(naniar)
library(apaSupp)
library(performance) 
library(interactions)
library(GGally)

14.1.2 Load Data

This dataset comes from John Hoffman’s textbook: Regression Models for Categorical, Count, and Related Variables: An Applied Approach (2004) Amazon link, 2014 edition

Chapter 3: Logistic and Probit Regression Models

Dataset: The following example uses the SPSS data set Depress.sav. The dependent variable of interest is a measure of life satisfaction, labeled satlife.

Dependent Variable = satlife (numeric version) or lifesat (factor version)

df_depress <- haven::read_spss("https://raw.githubusercontent.com/CEHS-research/data/master/Hoffmann_datasets/depress.sav") %>% 
  haven::as_factor() %>%    # labelled to factors
  haven::zap_label() %>%    # remove SPSS junk
  haven::zap_formats() %>%  # remove SPSS junk
  haven::zap_widths() %>%   # remove SPSS junk
  dplyr::mutate(sex = forcats::fct_recode(sex,
                                          "Female" = "female",
                                          "Male" = "male")) %>% 
  dplyr::mutate(lifesat = forcats::fct_recode(lifesat,
                                              "Yes (1)" = "high",
                                              "No (0)" = "low"))
tibble::glimpse(df_depress)
Rows: 118
Columns: 14
$ id      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
$ age     <dbl> 39, 41, 42, 30, 35, 44, 31, 39, 35, 33, 38, 31, 40, 44, 43, 32…
$ iq      <dbl> 94, 89, 83, 99, 94, 90, 94, 87, NA, 92, 92, 94, 91, 86, 90, NA…
$ anxiety <fct> medium low, medium low, medium high, medium low, medium low, N…
$ depress <fct> medium, medium, high, medium, low, low, medium, medium, medium…
$ sleep   <fct> low, low, low, low, high, low, NA, low, low, low, high, low, l…
$ sex     <fct> Female, Female, Female, Female, Female, Male, Female, Female, …
$ lifesat <fct> No (0), No (0), No (0), No (0), Yes (1), Yes (1), No (0), Yes …
$ weight  <dbl> 4.9, 2.2, 4.0, -2.6, -0.3, 0.9, -1.5, 3.5, -1.2, 0.8, -1.9, 5.…
$ satlife <dbl> 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,…
$ male    <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
$ sleep1  <dbl> 0, 0, 0, 0, 1, 0, NA, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, N…
$ newiq   <dbl> 2.21, -2.79, -8.79, 7.21, 2.21, -1.79, 2.21, -4.79, NA, 0.21, …
$ newage  <dbl> 1.5424, 3.5424, 4.5424, -7.4576, -2.4576, 6.5424, -6.4576, 1.5…
df_depress %>% 
  dplyr::group_by(lifesat, satlife) %>% 
  dplyr::tally()
# A tibble: 3 × 3
  lifesat satlife     n
  <fct>     <dbl> <int>
1 Yes (1)       1    52
2 No (0)        0    65
3 <NA>         NA     1

14.2 EXPLORATORY DATA ANALYSIS

14.2.1 Missing Data

df_depress %>% 
  dplyr::select("Sex" = sex,
                "Life Satisfaction" = lifesat,
                "IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  naniar::miss_var_summary() %>%
  dplyr::select(Variable = variable,
                n = n_miss) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "Missing Data by Variable")
Table 14.1
Missing Data by Variable

Variable

n

Weight, lbs

11

IQ, pts

8

Life Satisfaction

1

Sex

0

Age, yrs

0

df_depress %>% 
  dplyr::select("Sex" = sex,
                "Life Satisfaction" = lifesat,
                "IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  naniar::gg_miss_upset()

14.2.2 Summary

df_depress %>% 
  dplyr::select("Sex" = sex,
                "Life Satisfaction" = lifesat) %>% 
  apaSupp::tab_freq(caption = "Descriptive Summary of Categorical Variables")
Table 14.2
Descriptive Summary of Categorical Variables

Statistic
(N=118)

Sex

Male

21 (17.8%)

Female

97 (82.2%)

Life Satisfaction

Yes (1)

52 (44.1%)

No (0)

65 (55.1%)

Missing

1 (0.8%)

df_depress %>% 
  dplyr::select("IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  apaSupp::tab_desc(caption = "Descriptive Summary of Continuous Variables")
Table 14.3
Descriptive Summary of Continuous Variables

NA

M

SD

min

Q1

Mdn

Q3

max

IQ, pts

8

91.79

4.53

82.00

89.00

92.00

94.75

106.00

Age, yrs

0

37.46

4.74

29.00

33.00

39.00

41.75

46.00

Weight, lbs

11

1.59

2.72

-4.90

-0.65

1.70

3.55

8.30

Note. N = 118. NA = not available or missing; Mdn = median; Q1 = 25th percentile; Q3 = 75th percentile.

df_depress %>% 
  dplyr::select("Life Satisfaction" = lifesat,
                "Sex" = sex,
                "IQ" = iq,
                "Age" = age,
                "Weight" = weight) %>%
  dplyr::mutate_all(as.numeric) %>% 
  apaSupp::tab_cor(caption = "Pairwise Correlations for Life Satisfaction, Sexd, IQ, Age, and Weight") %>% 
  flextable::hline(i = 4)
Table 14.4
Pairwise Correlations for Life Satisfaction, Sexd, IQ, Age, and Weight

Variable Pair

r

p

Sex

Life Satisfaction

.210

.024*

IQ

Life Satisfaction

.092

.344

Age

Life Satisfaction

.100

.269

Weight

Life Satisfaction

.059

.550

IQ

Sex

.024

.806

Age

Sex

< .001

.672

Weight

Sex

.004

.968

Age

IQ

< .001

< .001***

Weight

IQ

< .001

.003**

Weight

Age

.420

< .001***

Note. N = 118. r = Pearson's Product-Moment correlation coefficient.

* p < .05. ** p < .01. *** p < .001.

14.2.3 Visualize

df_depress %>%
  apaSupp::spicy_histos(var = iq,
                        split = sex,
                        lab = "IQ")

df_depress %>%
  apaSupp::spicy_histos(var = age,
                        split = sex,
                        lab = "Age, yrs")

df_depress %>%
  apaSupp::spicy_histos(var = weight,
                        split = sex,
                        lab = "Weight, lbs")

df_depress %>% 
  dplyr::filter(complete.cases(lifesat, sex, iq, age, weight)) %>% 
  dplyr::select("Satisfaction" = lifesat,
                "Sex" = sex,
                "IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  GGally::ggpairs(aes(colour = Sex),
                  diag = list(continuous = GGally::wrap("densityDiag",
                                                        alpha = .3)),
                  lower = list(continuous = GGally::wrap("smooth", 
                                                         shape = 16,
                                                         se = FALSE, 
                                                         size  = 0.75))) +
  theme_bw() +
  scale_fill_manual(values = c("blue", "coral")) +
  scale_color_manual(values = c("blue", "coral")) 

df_depress %>% 
  ggplot(aes(x = iq,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "IQ Score",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

Figure 14.1
Hoffman’s Figure 2.3, top of page 46

Hoffman's Figure 2.3, top of page 46
df_depress %>% 
  ggplot(aes(x = weight,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Weight, lbs",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

Hoffman’s Figure 2.3, top of page 46

df_depress %>% 
  ggplot(aes(x = age,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Age in Years",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

df_depress %>% 
  ggplot(aes(x = age,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Age in Years",
       y = "Life Satisfaction, numeric") +
  facet_grid(~ sex) +
  theme(legend.position = "none") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

14.3 HAND CALCULATIONS

Probability, Odds, and Odds-Ratios

14.3.1 Marginal, over all the sample

Tally the number of participants who are satisfied vs. not…overall and by sex.

df_depress %>% 
  dplyr::select(sex,
                "Satisfied with Life" = lifesat) %>% 
  apaSupp::tab_freq(split = "sex",
                    caption = "Observed Life Satisfaction by Sex")
Table 14.5
Observed Life Satisfaction by Sex

Male
(N=21)

Female
(N=97)

Total
(N=118)

Satisfied with Life

Yes (1)

14 (66.7%)

38 (39.2%)

52 (44.1%)

No (0)

7 (33.3%)

58 (59.8%)

65 (55.1%)

Missing

1 (1.0%)

1 (0.8%)

14.3.1.1 Probability of being happy

\[ prob_{yes} = \frac{n_{yes}}{n_{total}} = \frac{n_{yes}}{n_{yes} + n_{no}} \]

prob <- 52 / 117

prob
[1] 0.4444444

14.3.1.2 Odds of being happy

\[ odds_{yes} = \frac{n_{yes}}{n_{no}} = \frac{n_{yes}}{n_{total} - n_{yes}} \]

odds <- 52/65

odds
[1] 0.8

\[ odds_{yes} = \frac{prob_{yes}}{prob_{no}} = \frac{prob_{yes}}{1 - prob_{yes}} \]

prob/(1 - prob)
[1] 0.8

14.3.2 Comparing by Sex

Cross-tabulate happiness (satlife) with sex (male vs. female).

df_depress %>% 
  dplyr::select(satlife, sex) %>% 
  table() %>% 
  addmargins()
       sex
satlife Male Female Sum
    0      7     58  65
    1     14     38  52
    Sum   21     96 117

14.3.2.1 Probability of being happy, by sex

Reference category = male

prob_male <- 14 / 21

prob_male
[1] 0.6666667

Comparison Category = female

prob_female <- 38 / 96

prob_female
[1] 0.3958333

14.3.2.2 Odds of being happy, by sex

Reference category = male

odds_male <- 14 / 7

odds_male
[1] 2

Comparison Category = female

odds_female <- 38 / 58


odds_female
[1] 0.6551724

14.3.2.3 Odds-Ratio for sex

\[ OR_{\text{female vs. male}} = \frac{odds_{female}}{odds_{male}} \]

odds_ratio <- odds_female / odds_male

odds_ratio
[1] 0.3275862

\[ OR_{\text{female vs. male}} = \frac{\frac{prob_{female}}{1 - prob_{female}}}{\frac{prob_{male}}{1 - prob_{male}}} \]

(prob_female / (1 - prob_female)) / (prob_male / (1 - prob_male))
[1] 0.3275862

\[ OR_{\text{female vs. male}} = \frac{\frac{n_{yes|female}}{n_{no|female}}}{\frac{n_{yes|male}}{n_{no|male}}} \]

((38 / 58)/(14 / 7))
[1] 0.3275862

14.4 SINGLE PREDICTOR

14.4.1 Fit the Model

fit_glm_1 <- glm(satlife ~ sex,
                 data = df_depress,
                 family = binomial(link = "logit"))

14.4.2 Parameter Table

apaSupp::tab_glm(fit_glm_1)
Table 14.6
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

p

sex

Male

Female

0.33

[0.11, 0.86]

< .0

(0.51)

.028*

(Intercept)

0.69

(0.46)

.134

pseudo-R²

.044

Note. N = 117. CI = confidence interval. Significance denotes Wald t-tests for parameter estimates. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_1,
                 var_labels = c(sex = "Female vs. Male"),
                 pr2 = "both",
                 caption = "Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Unadjusted Odds Ratio",
                 p_note = "apa1",
                 show_single_row = "sex")
Table 14.7
Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Unadjusted Odds Ratio

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

p

Female vs. Male

0.33

[0.11, 0.86]

-1.12

(0.51)

.028*

(Intercept)

0.69

(0.46)

.134

pseudo-R²

Tjur

.044

McFadden

.032

Note. N = 117. CI = confidence interval. Significance denotes Wald t-tests for parameter estimates. Coefficient of deterination included for both Tjur and McFadden's pseudo-R².

* p < .05.

14.4.3 Model Fit

See this commentary by Paul Alison: https://statisticalhorizons.com/r2logistic/

performance::model_performance(fit_glm_1)
# A tibble: 1 × 10
    AIC  AICc   BIC R2_Tjur  RMSE Sigma Log_loss Score_log Score_spherical   PCP
  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>    <dbl>     <dbl>           <dbl> <dbl>
1  160.  160.  165.  0.0437 0.486     1    0.665     -30.1          0.0550 0.528
performance::r2(fit_glm_1)
# R2 for Logistic Regression
  Tjur's R2: 0.044
performance::r2_mcfadden(fit_glm_1)
# R2 for Generalized Linear Regression
       R2: 0.032
  adj. R2: 0.019

14.4.4 Predicted Probability

14.4.4.1 Logit scale

fit_glm_1 %>% 
  emmeans::emmeans(~ sex)
 sex    emmean    SE  df asymp.LCL asymp.UCL
 Male    0.693 0.463 Inf    -0.214    1.6004
 Female -0.423 0.209 Inf    -0.832   -0.0138

Results are given on the logit (not the response) scale. 
Confidence level used: 0.95 
fit_glm_1 %>% 
  emmeans::emmeans(~ sex) %>% 
  pairs()
 contrast      estimate    SE  df z.ratio p.value
 Male - Female     1.12 0.508 Inf   2.198  0.0280

Results are given on the log odds ratio (not the response) scale. 

14.4.4.2 Response Scale

Probability

fit_glm_1 %>% 
  emmeans::emmeans(~ sex,
                   type = "response")
 sex     prob     SE  df asymp.LCL asymp.UCL
 Male   0.667 0.1030 Inf     0.447     0.832
 Female 0.396 0.0499 Inf     0.303     0.497

Confidence level used: 0.95 
Intervals are back-transformed from the logit scale 
fit_glm_1 %>% 
  emmeans::emmeans(~ sex,
                   type = "response") %>% 
  pairs()
 contrast      odds.ratio   SE  df null z.ratio p.value
 Male / Female       3.05 1.55 Inf    1   2.198  0.0280

Tests are performed on the log odds ratio scale 

14.4.5 Interpretation

  • On average, two out of every three males is satisfied, b = 0.667, odds = 1.95, 95% CI [1.58, 2.40].

  • Females have 67% lower odds of being depressed, compared to men, b = -0.27, OR = 0.77, 95% CI [0.61, 0.96], p = .028.

14.4.6 Diagnostics

14.4.6.1 Influential values

Influential values are extreme individual data points that can alter the quality of the logistic regression model.

The most extreme values in the data can be examined by visualizing the Cook’s distance values. Here we label the top 7 largest values:

plot(fit_glm_1, which = 4, id.n = 7)

Note that, not all outliers are influential observations. To check whether the data contains potential influential observations, the standardized residual error can be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and may deserve closer attention.

14.4.6.2 Standardized Residuals

The following R code computes the standardized residuals (.std.resid) using the R function augment() [broom package].

fit_glm_1 %>% 
  broom::augment() %>% 
  ggplot(aes(x = .rownames, .std.resid)) + 
  geom_point(aes(color = sex), alpha = .5) +
  theme_bw()

14.5 MULTIPLE PREDICTORS

14.5.1 Fit the Model

fit_glm_2 <- glm(satlife ~ sex + iq + age + weight,
                 data = df_depress,
                 family = binomial(link = "logit"))

14.5.2 Parameter Table

EXAMPLE 3.3 A Logistic Regression Model of Life Satisfaction with Multiple Independent Variables, middle of page 52

apaSupp::tab_glm(fit_glm_2)
Table 14.8
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

sex

.018*

1.01

Male

Female

0.28

[0.09, 0.81]

< .0

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

(Intercept)

9.02

(6.02)

.134

pseudo-R²

.075

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, vif = FALSE, pr2 = "both")
Table 14.9
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

sex

.018*

Male

Female

0.28

[0.09, 0.81]

< .0

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

(Intercept)

9.02

(6.02)

.134

pseudo-R²

Tjur

.075

McFadden

.207

Note. N = 99. CI = confidence interval. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination included for both Tjur and McFadden's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, fit = c("AIC", "BIC"))
Table 14.10
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

sex

.018*

1.01

Male

Female

0.28

[0.09, 0.81]

< .0

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

AIC

138

BIC

150

(Intercept)

9.02

(6.02)

.134

pseudo-R²

.075

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, vif = FALSE, lrt = FALSE)
Table 14.11
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

p

sex

Male

Female

0.28

[0.09, 0.81]

< .0

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

(Intercept)

9.02

(6.02)

.134

pseudo-R²

.075

Note. N = 99. CI = confidence interval. Significance denotes Wald t-tests for parameter estimates. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2,
                 var_labels = c(sex = "Female vs. Male",
                                iq = "IQ, pts",
                                age = "Age, yrs",
                                weight = "Weight, lbs"), 
                 pr2 = "both",
                 show_single_row = "sex",
                 p_note = "apa1",
                 caption = "Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Controlling fro IQ, Age, and Weight") %>% 
  flextable::bold(i = c(2))
Table 14.12
Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Controlling fro IQ, Age, and Weight

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

Female vs. Male

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

.018*

1.01

IQ, pts

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

Age, yrs

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

Weight, lbs

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

(Intercept)

9.02

(6.02)

.134

pseudo-R²

Tjur

.075

McFadden

.207

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination included for both Tjur and McFadden's pseudo-R².

* p < .05.

apaSupp::tab_glm_fits(list("Univariate"   = fit_glm_1, 
                           "Multivariate" = fit_glm_2))
Table 14.13
Comparison of Generalized Linear Model Performane Metrics

pseudo- R2R^2

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Univariate

117

2

.032

.044

159.62

165.14

0.49

Multivariate

99

5

.207

.075

137.52

150.50

0.47

Note. Models fit to different samples. k = number of parameters estimated in each model. Larger R2R^2 values indicated better performance. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

14.6 COMPARE MODELS

14.6.1 Refit to Complete Cases

Restrict the data to only participant that have all four of these predictors.

df_depress_model <- df_depress %>% 
  dplyr::filter(complete.cases(satlife, 
                               sex, iq, age, weight))

Refit Model 1 with only participant complete on all the predictors.

fit_glm_1_redo <- glm(satlife ~ sex,
                      data = df_depress_model,
                      family = binomial(link = "logit"))

fit_glm_2_redo <- glm(satlife ~ sex + iq + age + weight,
                      data = df_depress_model,
                      family = binomial(link = "logit"))

14.6.2 Parameter Table

apaSupp::tab_glms(list(fit_glm_1_redo, fit_glm_2_redo))
Table 14.14
Compare Generalized Regression Models

Model 1

Model 2

Variable

OR

95% CI

p

OR

95% CI

p

sex

Male

Female

0.29

[0.09, 0.84]

.026*

0.28

[0.09, 0.81]

.022*

iq

0.93

[0.83, 1.03]

.174

age

0.96

[0.86, 1.06]

.433

weight

0.96

[0.81, 1.15]

.671

Fit Metrics

AIC

133.7

137.5

BIC

138.9

150.5

pseudo-R²

Tjur

.053

.075

McFadden

.039

.055

Note. N = 99. CI = confidence interval.Coefficient of deterination estiamted with both Tjur and McFadden's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glms(list("Univariate"   = fit_glm_1_redo, 
                       "Multivariate" = fit_glm_2_redo),
                  var_labels = c(sex = "Sex",
                                 iq = "IQ Score",
                                 age = "Age, yrs",
                                 weight = "Weight, lbs"),
                  fit = "AIC",
                  pr2 = "tjur",
                  narrow = TRUE) %>% 
  flextable::bold(i = 3)
Table 14.15
Compare Generalized Regression Models

Univariate

Multivariate

Variable

OR

95% CI

OR

95% CI

Sex

Male

Female

0.29

[0.09, 0.84]*

0.28

[0.09, 0.81]*

IQ Score

0.93

[0.83, 1.03]

Age, yrs

0.96

[0.86, 1.06]

Weight, lbs

0.96

[0.81, 1.15]

AIC

133.70

137.52

pseudo-R²

.053

.075

Note. N = 99. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

14.6.3 Comparison Criteria

apaSupp::tab_glm_fits(list("Univariate, Initial"   = fit_glm_1, 
                           "Univariate, Restricted" = fit_glm_1_redo, 
                           "Multivariate" = fit_glm_2_redo))
Table 14.16
Comparison of Generalized Linear Model Performane Metrics

pseudo- R2R^2

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Univariate, Initial

117

2

.032

.044

159.62

165.14

0.49

Univariate, Restricted

99

2

.039

.053

133.70

138.89

0.48

Multivariate

99

5

.055

.075

137.52

150.50

0.47

Note. Models fit to different samples. k = number of parameters estimated in each model. Larger R2R^2 values indicated better performance. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

anova(fit_glm_1_redo, 
      fit_glm_2_redo,
      test = "LRT")
# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1          97         130.    NA    NA        NA    
2          94         128.     3     2.18      0.537

14.6.4 Interpretation

  • Only sex is predictive of depression. There is no evidence IQ, age, or weight are associated with depression, LRT \(\chi^2\)(3) = 2.17, p = .537.

14.7 CHANGING REFERENCE CATEGORY

levels(df_depress$sex)
[1] "Male"   "Female"
df_depress_ref <- df_depress %>% 
  dplyr::mutate(male   = sex %>% forcats::fct_relevel("Female", after = 0)) %>%   dplyr::mutate(female = sex %>% forcats::fct_relevel("Male",   after = 0))
levels(df_depress_ref$male)
[1] "Female" "Male"  
levels(df_depress_ref$female)
[1] "Male"   "Female"
fit_glm_2_male <- glm(satlife ~ male + iq + age + weight,
                      data = df_depress_ref,
                      family = binomial(link = "logit"))

fit_glm_2_female <- glm(satlife ~ female + iq + age + weight,
                        data = df_depress_ref,
                        family = binomial(link = "logit"))
apaSupp::tab_glms(list("Reference = Male"  = fit_glm_2_female,
                       "Reference = Female" = fit_glm_2_male)) %>% 
  flextable::bold(i = c(3, 9))
Table 14.17
Compare Generalized Regression Models

Reference = Male

Reference = Female

Variable

OR

95% CI

p

OR

95% CI

p

female

Male

Female

0.28

[0.09, 0.81]

.022*

iq

0.93

[0.83, 1.03]

.174

0.93

[0.83, 1.03]

.174

age

0.96

[0.86, 1.06]

.433

0.96

[0.86, 1.06]

.433

weight

0.96

[0.81, 1.15]

.671

0.96

[0.81, 1.15]

.671

male

Female

Male

3.59

[1.24, 11.49]

.022*

Fit Metrics

AIC

137.5

137.5

BIC

150.5

150.5

pseudo-R²

Tjur

.075

.075

McFadden

.207

.207

Note. N = 99. CI = confidence interval.Coefficient of deterination estiamted with both Tjur and McFadden's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.