14 Logistic Regression - Ex: Depression (Hoffman)

Compiled: April 10, 2025

14.1 PREPARATION

14.1.1 Load Packages

# library(remotes)
# remotes::install_github("sarbearschwartz/apaSupp")  # updated: 4/10/25
# remotes::install_github("ddsjoberg/gtsummary")


library(tidyverse)
library(haven)        # read in SPSS dataset
library(apaSupp)
library(pscl)         # psudo R-squared function
library(performance)  # r-squared values
library(GGally)

14.1.2 Load Data

This dataset comes from John Hoffman’s textbook: Regression Models for Categorical, Count, and Related Variables: An Applied Approach (2004) Amazon link, 2014 edition

Chapter 3: Logistic and Probit Regression Models

Dataset: The following example uses the SPSS data set Depress.sav. The dependent variable of interest is a measure of life satisfaction, labeled satlife.

df_depress <- haven::read_spss("https://raw.githubusercontent.com/CEHS-research/data/master/Hoffmann_datasets/depress.sav") %>% 
  haven::as_factor() %>%    # labelled to factors
  haven::zap_label() %>%    # remove SPSS junk
  haven::zap_formats() %>%  # remove SPSS junk
  haven::zap_widths() %>%   # remove SPSS junk
  dplyr::mutate(sex = forcats::fct_recode(sex,
                                          "Female" = "female",
                                          "Male" = "male")) %>% 
  dplyr::mutate(lifesat = forcats::fct_recode(lifesat,
                                              "Yes (1)" = "high",
                                              "No (0)" = "low"))
tibble::glimpse(df_depress)
Rows: 118
Columns: 14
$ id      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
$ age     <dbl> 39, 41, 42, 30, 35, 44, 31, 39, 35, 33, 38, 31, 40, 44, 43, 32…
$ iq      <dbl> 94, 89, 83, 99, 94, 90, 94, 87, NA, 92, 92, 94, 91, 86, 90, NA…
$ anxiety <fct> medium low, medium low, medium high, medium low, medium low, N…
$ depress <fct> medium, medium, high, medium, low, low, medium, medium, medium…
$ sleep   <fct> low, low, low, low, high, low, NA, low, low, low, high, low, l…
$ sex     <fct> Female, Female, Female, Female, Female, Male, Female, Female, …
$ lifesat <fct> No (0), No (0), No (0), No (0), Yes (1), Yes (1), No (0), Yes …
$ weight  <dbl> 4.9, 2.2, 4.0, -2.6, -0.3, 0.9, -1.5, 3.5, -1.2, 0.8, -1.9, 5.…
$ satlife <dbl> 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,…
$ male    <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
$ sleep1  <dbl> 0, 0, 0, 0, 1, 0, NA, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, N…
$ newiq   <dbl> 2.21, -2.79, -8.79, 7.21, 2.21, -1.79, 2.21, -4.79, NA, 0.21, …
$ newage  <dbl> 1.5424, 3.5424, 4.5424, -7.4576, -2.4576, 6.5424, -6.4576, 1.5…
df_depress %>% 
  dplyr::group_by(lifesat, satlife) %>% 
  dplyr::tally()
# A tibble: 3 × 3
  lifesat satlife     n
  <fct>     <dbl> <int>
1 Yes (1)       1    52
2 No (0)        0    65
3 <NA>         NA     1

14.2 EXPLORATORY DATA ANALYSIS

Dependent Variable = satlife (numeric version) or lifesat (factor version)

14.2.1 Summary

df_depress %>% 
  dplyr::select("Sex" = sex,
                "Life Satisfaction" = lifesat) %>% 
  apaSupp::tab_freq(caption = "Descriptive Summary of Categorical Variables")
Table 14.1
Descriptive Summary of Categorical Variables

Statistic
(N=118)

Sex

Male

21 (17.8%)

Female

97 (82.2%)

Life Satisfaction

Yes (1)

52 (44.1%)

No (0)

65 (55.1%)

Missing

1 (0.8%)

df_depress %>% 
  dplyr::select("IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  apaSupp::tab_desc(caption = "Descriptive Summary of Continuous Variables")
Table 14.2
Descriptive Summary of Continuous Variables

Variable

NA

M

SD

min

Q1

Mdn

Q3

max

IQ, pts

8

91.79

4.53

82.00

89.00

92.00

94.75

106.00

Age, yrs

0

37.46

4.74

29.00

33.00

39.00

41.75

46.00

Weight, lbs

11

1.59

2.72

-4.90

-0.65

1.70

3.55

8.30

Note. NA = not available or missing. Mdn = median. Q1 = 25th percentile, Q3 = 75th percentile. N = 118.

df_depress %>% 
  dplyr::select("Life Satisfaction" = lifesat,
                "Sex" = sex,
                "IQ" = iq,
                "Age" = age,
                "Weight" = weight) %>%
  dplyr::mutate_all(as.numeric) %>% 
  apaSupp::tab_cor(caption = "Pairwise Correlations for Life Satisfaction, Sexd, IQ, Age, and Weight") %>% 
  flextable::hline(i = 4)
Table 14.3
Pairwise Correlations for Life Satisfaction, Sexd, IQ, Age, and Weight

Variables

r

p

Life Satisfaction

Sex

0.210

.024

*

Life Satisfaction

IQ

0.092

.344

Life Satisfaction

Age

0.100

.269

Life Satisfaction

Weight

0.059

.550

Sex

IQ

0.024

.806

Sex

Age

-0.039

.672

Sex

Weight

0.004

.968

IQ

Age

-0.430

< .001

***

IQ

Weight

-0.290

.003

**

Age

Weight

0.420

< .001

***

Note. r = Pearson's Product-Moment correlation coefficient. N = 118.

* p < .05. ** p < .01. *** p < .001.

14.2.2 Visualize

df_depress %>%
  apaSupp::spicy_histos(var = iq,
                        split = sex,
                        lab = "IQ")

df_depress %>%
  apaSupp::spicy_histos(var = age,
                        split = sex,
                        lab = "Age, yrs")

df_depress %>%
  apaSupp::spicy_histos(var = weight,
                        split = sex,
                        lab = "Weight, lbs")

df_depress %>% 
  dplyr::filter(complete.cases(lifesat, sex, iq, age, weight)) %>% 
  dplyr::select("Satisfaction" = lifesat,
                "Sex" = sex,
                "IQ, pts" = iq,
                "Age, yrs" = age,
                "Weight, lbs" = weight) %>% 
  GGally::ggpairs(aes(colour = Sex),
                  diag = list(continuous = GGally::wrap("densityDiag",
                                                        alpha = .3)),
                  lower = list(continuous = GGally::wrap("smooth", 
                                                         shape = 16,
                                                         se = FALSE, 
                                                         size  = 0.75))) +
  theme_bw() +
  scale_fill_manual(values = c("blue", "coral")) +
  scale_color_manual(values = c("blue", "coral")) 

df_depress %>% 
  ggplot(aes(x = iq,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "IQ Score",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

Figure 14.1
Hoffman’s Figure 2.3, top of page 46

Hoffman's Figure 2.3, top of page 46
df_depress %>% 
  ggplot(aes(x = weight,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Weight, lbs",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

Hoffman’s Figure 2.3, top of page 46

df_depress %>% 
  ggplot(aes(x = age,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Age in Years",
       y = "Life Satisfaction, numeric") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

df_depress %>% 
  ggplot(aes(x = age,
             y = satlife)) +
  geom_count() +
  geom_smooth(method = "lm") +
  theme_bw() +
  labs(x = "Age in Years",
       y = "Life Satisfaction, numeric") +
  facet_grid(~ sex) +
  theme(legend.position = "none") +
  scale_y_continuous(breaks = 0:1,
                     labels = c("No (0)", 
                                "Yes (1)"))

14.3 HAND CALCULATIONS

Probability, Odds, and Odds-Ratios

14.3.1 Marginal, over all the sample

Tally the number of participants who are satisfied vs. not…overall and by sex.

df_depress %>% 
  dplyr::select(sex,
                "Satisfied with Life" = lifesat) %>% 
  apaSupp::tab_freq(split = "sex",
                    caption = "Observed Life Satisfaction by Sex")
Table 14.4
Observed Life Satisfaction by Sex

Male
(N=21)

Female
(N=97)

Total
(N=118)

Satisfied with Life

Yes (1)

14 (66.7%)

38 (39.2%)

52 (44.1%)

No (0)

7 (33.3%)

58 (59.8%)

65 (55.1%)

Missing

1 (1.0%)

1 (0.8%)

14.3.1.1 Probability of being happy

\[ prob_{yes} = \frac{n_{yes}}{n_{total}} = \frac{n_{yes}}{n_{yes} + n_{no}} \]

prob <- 52 / 117

prob
[1] 0.4444444

14.3.1.2 Odds of being happy

\[ odds_{yes} = \frac{n_{yes}}{n_{no}} = \frac{n_{yes}}{n_{total} - n_{yes}} \]

odds <- 52/65

odds
[1] 0.8

\[ odds_{yes} = \frac{prob_{yes}}{prob_{no}} = \frac{prob_{yes}}{1 - prob_{yes}} \]

prob/(1 - prob)
[1] 0.8

14.3.2 Comparing by Sex

Cross-tabulate happiness (satlife) with sex (male vs. female).

df_depress %>% 
  dplyr::select(satlife, sex) %>% 
  table() %>% 
  addmargins()
       sex
satlife Male Female Sum
    0      7     58  65
    1     14     38  52
    Sum   21     96 117

14.3.2.1 Probability of being happy, by sex

Reference category = male

prob_male <- 14 / 21

prob_male
[1] 0.6666667

Comparison Category = female

prob_female <- 38 / 96

prob_female
[1] 0.3958333

14.3.2.2 Odds of being happy, by sex

Reference category = male

odds_male <- 14 / 7

odds_male
[1] 2

Comparison Category = female

odds_female <- 38 / 58


odds_female
[1] 0.6551724

14.3.2.3 Odds-Ratio for sex

\[ OR_{\text{female vs. male}} = \frac{odds_{female}}{odds_{male}} \]

odds_ratio <- odds_female / odds_male

odds_ratio
[1] 0.3275862

\[ OR_{\text{female vs. male}} = \frac{\frac{prob_{female}}{1 - prob_{female}}}{\frac{prob_{male}}{1 - prob_{male}}} \]

(prob_female / (1 - prob_female)) / (prob_male / (1 - prob_male))
[1] 0.3275862

\[ OR_{\text{female vs. male}} = \frac{\frac{n_{yes|female}}{n_{no|female}}}{\frac{n_{yes|male}}{n_{no|male}}} \]

((38 / 58)/(14 / 7))
[1] 0.3275862

14.4 SINGLE PREDICTOR

14.4.1 Fit the Model

fit_glm_1 <- glm(satlife ~ sex,
                 data = df_depress,
                 family = binomial(link = "logit"))

14.4.2 Parameter Table

apaSupp::tab_glm(fit_glm_1,
                 var_labels = c(sex = "Sex"),
                 caption = "Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Unadjusted Odds Ratio")
Table 14.5
Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Unadjusted Odds Ratio

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

p

(Intercept)

0.69

(0.46)

.134

Sex

Male

Female

0.33

[0.11, 0.86]

-1.12

(0.51)

.028*

Tjur's R²

.044

Note. N = 117. CI = confidence interval. Significance denotes Wald t-tests for parameter estimates.

* p < .05. ** p < .01. *** p < .001.

14.4.3 Model Fit

See this commentary by Paul Alison: https://statisticalhorizons.com/r2logistic/

performance::model_performance(fit_glm_1)
# A tibble: 1 × 10
    AIC  AICc   BIC R2_Tjur  RMSE Sigma Log_loss Score_log Score_spherical   PCP
  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>    <dbl>     <dbl>           <dbl> <dbl>
1  160.  160.  165.  0.0437 0.486     1    0.665     -30.1          0.0550 0.528
performance::r2(fit_glm_1)
# R2 for Logistic Regression
  Tjur's R2: 0.044
performance::r2_mcfadden(fit_glm_1)
# R2 for Generalized Linear Regression
       R2: 0.032
  adj. R2: 0.019

14.4.4 Predicted Probability

14.4.4.1 Logit scale

fit_glm_1 %>% 
  emmeans::emmeans(~ sex)
 sex    emmean    SE  df asymp.LCL asymp.UCL
 Male    0.693 0.463 Inf    -0.214    1.6004
 Female -0.423 0.209 Inf    -0.832   -0.0138

Results are given on the logit (not the response) scale. 
Confidence level used: 0.95 
fit_glm_1 %>% 
  emmeans::emmeans(~ sex) %>% 
  pairs()
 contrast      estimate    SE  df z.ratio p.value
 Male - Female     1.12 0.508 Inf   2.198  0.0280

Results are given on the log odds ratio (not the response) scale. 

14.4.4.2 Response Scale

Probability

fit_glm_1 %>% 
  emmeans::emmeans(~ sex,
                   type = "response")
 sex     prob     SE  df asymp.LCL asymp.UCL
 Male   0.667 0.1030 Inf     0.447     0.832
 Female 0.396 0.0499 Inf     0.303     0.497

Confidence level used: 0.95 
Intervals are back-transformed from the logit scale 
fit_glm_1 %>% 
  emmeans::emmeans(~ sex,
                   type = "response") %>% 
  pairs()
 contrast      odds.ratio   SE  df null z.ratio p.value
 Male / Female       3.05 1.55 Inf    1   2.198  0.0280

Tests are performed on the log odds ratio scale 

14.4.5 Interpretation

  • On average, two out of every three males is satisfied, b = 0.667, odds = 1.95, 95% CI [1.58, 2.40].

  • Females have 67% lower odds of being depressed, compared to men, b = -0.27, OR = 0.77, 95% CI [0.61, 0.96], p = .028.

14.4.6 Diagnostics

14.4.6.1 Influential values

Influential values are extreme individual data points that can alter the quality of the logistic regression model.

The most extreme values in the data can be examined by visualizing the Cook’s distance values. Here we label the top 7 largest values:

plot(fit_glm_1, which = 4, id.n = 7)

Note that, not all outliers are influential observations. To check whether the data contains potential influential observations, the standardized residual error can be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and may deserve closer attention.

14.4.6.2 Standardized Residuals

The following R code computes the standardized residuals (.std.resid) using the R function augment() [broom package].

fit_glm_1 %>% 
  broom::augment() %>% 
  ggplot(aes(x = .rownames, .std.resid)) + 
  geom_point(aes(color = sex), alpha = .5) +
  theme_bw()

14.5 MULTIPLE PREDICTORS

14.5.1 Fit the Model

fit_glm_2 <- glm(satlife ~ sex + iq + age + weight,
                 data = df_depress,
                 family = binomial(link = "logit"))

14.5.2 Parameter Table

EXAMPLE 3.3 A Logistic Regression Model of Life Satisfaction with Multiple Independent Variables, middle of page 52

apaSupp::tab_glm(fit_glm_2)
Table 14.6
Generalized Regression Model

Odds Ratio

Logit Scale

p

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

(Intercept)

9.02

(6.02)

.134

sex

.018*

1.01

Male

Female

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

Tjur's R²

.075

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion.

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, vif = FALSE)
Table 14.7
Generalized Regression Model

Odds Ratio

Logit Scale

p

Variable

OR

95% CI

b

(SE)

Wald

LRT

(Intercept)

9.02

(6.02)

.134

sex

.018*

Male

Female

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

Tjur's R²

.075

Note. N = 99. CI = confidence interval. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion.

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, fit = c("AIC", "BIC"))
Table 14.8
Generalized Regression Model

Odds Ratio

Logit Scale

p

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

(Intercept)

9.02

(6.02)

.134

sex

.018*

1.01

Male

Female

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

AIC

138

BIC

150

Tjur's R²

.075

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion.

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2, vif = FALSE, lrt = FALSE)
Table 14.9
Generalized Regression Model

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

p

(Intercept)

9.02

(6.02)

.134

sex

Male

Female

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

iq

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

age

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

weight

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

Tjur's R²

.075

Note. N = 99. CI = confidence interval. Significance denotes Wald t-tests for parameter estimates.

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm(fit_glm_2,
                 var_labels = c(sex = "Sex",
                                iq = "IQ, pts",
                                age = "Age, yrs",
                                weight = "Weight, lbs"),
                 caption = "Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Controlling fro IQ, Age, and Weight") %>% 
  flextable::bold(i = c(2:4))
Table 14.10
Parameter Etimates for Logistic Regressing for Life Satisfaction by Sex, Controlling fro IQ, Age, and Weight

Odds Ratio

Logit Scale

p

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

(Intercept)

9.02

(6.02)

.134

Sex

.018*

1.01

Male

Female

0.28

[0.09, 0.81]

-1.28

(0.56)

.022*

IQ, pts

0.93

[0.83, 1.03]

-0.07

(0.05)

.174

.167

1.29

Age, yrs

0.96

[0.86, 1.06]

-0.04

(0.05)

.433

.431

1.42

Weight, lbs

0.96

[0.81, 1.15]

-0.04

(0.09)

.671

.671

1.23

Tjur's R²

.075

Note. N = 99. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion.

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm_fits(list("Univariate"   = fit_glm_1, 
                           "Multivariate" = fit_glm_2))
Table 14.11
Comparison of Generalized Linear Model Performane Metrics

pseudo-R²

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Univariate

117

2

.032

.044

159.62

165.14

0.49

Multivariate

99

5

.207

.075

137.52

150.50

0.47

Note. Models fit to different samples. k = number of parameters estimated in each model. Larger values indicated better performance for pseudo R-squared values. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

14.6 COMPARE MODELS

14.6.1 Refit to Complete Cases

Restrict the data to only participant that have all four of these predictors.

df_depress_model <- df_depress %>% 
  dplyr::filter(complete.cases(satlife, 
                               sex, iq, age, weight))

Refit Model 1 with only participant complete on all the predictors.

fit_glm_1_redo <- glm(satlife ~ sex,
                      data = df_depress_model,
                      family = binomial(link = "logit"))

fit_glm_2_redo <- glm(satlife ~ sex + iq + age + weight,
                      data = df_depress_model,
                      family = binomial(link = "logit"))

14.6.2 Parameter Table

apaSupp::tab_glms(list(fit_glm_1_redo, fit_glm_2_redo))
Table 14.12
Compare Regression Models

Model 1

Model 2

Variable

OR

95% CI

p

OR

95% CI

p

sex

Male

Female

0.29

[0.09, 0.84]

.026*

0.28

[0.09, 0.81]

.022*

iq

0.93

[0.83, 1.03]

.174

age

0.96

[0.86, 1.06]

.433

weight

0.96

[0.81, 1.15]

.671

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glms(list("Univariate"   = fit_glm_1_redo, 
                       "Multivariate" = fit_glm_2_redo),
                  var_labels = c(sex = "Sex",
                                 iq = "IQ Score",
                                 age = "Age, yrs",
                                 weight = "Weight, lbs"),
                  fit = c("AIC", "BIC"),
         narrow = TRUE) %>% 
  flextable::bold(i = 3)
Table 14.13
Compare Regression Models

Univariate

Multivariate

Variable

OR

95% CI

OR

95% CI

Sex

Male

Female

0.29

[0.09, 0.84]*

0.28

[0.09, 0.81]*

IQ Score

0.93

[0.83, 1.03]

Age, yrs

0.96

[0.86, 1.06]

Weight, lbs

0.96

[0.81, 1.15]

AIC

133.70

137.52

BIC

138.89

150.50

* p < .05. ** p < .01. *** p < .001.

14.6.3 Comparison Criteria

apaSupp::tab_glm_fits(list("Univariate, Initial"   = fit_glm_1, 
                           "Univariate, Restricted" = fit_glm_1_redo, 
                           "Multivariate" = fit_glm_2_redo))
Table 14.14
Comparison of Generalized Linear Model Performane Metrics

pseudo-R²

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Univariate, Initial

117

2

.032

.044

159.62

165.14

0.49

Univariate, Restricted

99

2

.039

.053

133.70

138.89

0.48

Multivariate

99

5

.055

.075

137.52

150.50

0.47

Note. Models fit to different samples. k = number of parameters estimated in each model. Larger values indicated better performance for pseudo R-squared values. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

anova(fit_glm_1_redo, 
      fit_glm_2_redo,
      test = "LRT")
# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1          97         130.    NA    NA        NA    
2          94         128.     3     2.18      0.537

14.6.4 Interpretation

  • Only sex is predictive of depression. There is no evidence IQ, age, or weight are associated with depression, LRT \(\chi^2\)(3) = 2.17, p = .537.

14.7 CHANGING REFERENCE CATEGORY

levels(df_depress$sex)
[1] "Male"   "Female"
df_depress_ref <- df_depress %>% 
  dplyr::mutate(male   = sex %>% forcats::fct_relevel("Female", after = 0)) %>%   dplyr::mutate(female = sex %>% forcats::fct_relevel("Male",   after = 0))
levels(df_depress_ref$male)
[1] "Female" "Male"  
levels(df_depress_ref$female)
[1] "Male"   "Female"
fit_glm_2_male <- glm(satlife ~ male + iq + age + weight,
                      data = df_depress_ref,
                      family = binomial(link = "logit"))

fit_glm_2_female <- glm(satlife ~ female + iq + age + weight,
                      data = df_depress_ref,
                      family = binomial(link = "logit"))
apaSupp::tab_glms(list("Reference = Male"  = fit_glm_2_female,
                       "Reference = Female" = fit_glm_2_male),
                  fit = c("AIC", "BIC")) %>% 
  flextable::bold(i = c(3, 9))
Table 14.15
Compare Regression Models

Reference = Male

Reference = Female

Variable

OR

95% CI

p

OR

95% CI

p

female

Male

Female

0.28

[0.09, 0.81]

.022*

iq

0.93

[0.83, 1.03]

.174

0.93

[0.83, 1.03]

.174

age

0.96

[0.86, 1.06]

.433

0.96

[0.86, 1.06]

.433

weight

0.96

[0.81, 1.15]

.671

0.96

[0.81, 1.15]

.671

male

Female

Male

3.59

[1.24, 11.49]

.022*

AIC

137.52

137.52

BIC

150.50

150.50

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm_fits(list("Reference = Female" = fit_glm_2_male,
                           "Reference = Male"  = fit_glm_2_female))
Table 14.16
Comparison of Generalized Linear Model Performane Metrics

pseudo-R²

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Reference = Female

99

5

.207

.075

137.52

150.50

0.47

Reference = Male

99

5

.207

.075

137.52

150.50

0.47

Note. k = number of parameters estimated in each model. Larger values indicated better performance for pseudo R-squared values. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).