16 Ex: Logistic - Maternal Risk (Hoffman)

Maternal Risk Factor for Low Birth Weight Delivery

Compiled: April 22, 2025

16.1 PREPARATION

16.1.1 Load Packages

# library(remotes)
# remotes::install_github("sarbearschwartz/apaSupp")  # updated: 4/12/25
# remotes::install_github("ddsjoberg/gtsummary")


library(tidyverse)
library(haven)        
library(naniar)
library(apaSupp)
library(performance) 
library(interactions)
library(GGally)

16.1.2 Load Data

More complex example demonstrating modeling decisions

Another set of data from a study investigating predictors of low birth weight

  • id infant’s unique identification number

Dependent variable (DV) or outcome

  • low Low birth weight (outcome)

  • 0 = birth weight >2500 g (normal)

  • 1 = birth weight < 2500 g (low))

  • bwt actual infant birth weight in grams (ignore for now)

Independent variables (IV) or predictors

  • age Age of mother, in years
  • lwt Mother’s weight at last menstrual period, in pounds
  • race Race: 1 = White, 2 = Black, 3 = Other
  • smoke Smoking status during pregnancy:1 = Yes, 0 = No
  • ptl History of premature labor: 0 = None, 1 = One, 2 = two, 3 = three
  • ht History of hypertension: 1 = Yes, 0 = No
  • ui Uterine irritability: 1 = Yes, 0 = No
  • ftv Number of physician visits in 1st trimester: 0 = None, 1 = One, … 6 = six

The data is saved in a text file (.txt) without any labels.

df_txt <- read.table("https://raw.githubusercontent.com/CEHS-research/data/master/Regression/lowbwt.txt", 
                     header = TRUE, 
                     sep = "", 
                     na.strings = "NA", 
                     dec = ".", 
                     strip.white = TRUE)

tibble::glimpse(df_txt)
Rows: 189
Columns: 11
$ id    <int> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101…
$ low   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ age   <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18, …
$ lwt   <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95, 1…
$ race  <int> 2, 3, 1, 1, 1, 3, 1, 3, 1, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 1, 3, 1…
$ smoke <int> 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0…
$ ptl   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ ht    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ui    <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1…
$ ftv   <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2, 3…
$ bwt   <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 2722…

16.1.3 Wrangle Data

df_mom <- df_txt %>% 
  dplyr::mutate(id = factor(id)) %>% 
  dplyr::mutate(low = low %>% 
                  factor() %>% 
                  forcats::fct_recode("birth weight >2500 g (normal)" = "0",
                                      "birth weight < 2500 g (low)"   = "1")) %>% 
  dplyr::mutate(race = race %>% 
                  factor() %>% 
                  forcats::fct_recode("White" = "1",
                                      "Black" = "2",
                                      "Other" = "3")) %>% 
  dplyr::mutate(ptl_any = as.numeric(ptl > 0)) %>%         # collapse into 0 = none vs. 1 = at least one
  dplyr::mutate(ptl = factor(ptl)) %>%                     # declare the number of pre-term labors to be a factor: 0, 1, 2, 3
  dplyr::mutate_at(vars(smoke, ht, ui, ptl_any),           # declare all there variables to be factors with the same two levels
                   factor,
                   levels = 0:1,
                   labels = c("No", "Yes")) 

Display the structure of the ‘clean’ version of the dataset

str(df_mom)
'data.frame':   189 obs. of  12 variables:
 $ id     : Factor w/ 189 levels "4","10","11",..: 60 61 62 63 64 65 66 67 68 69 ...
 $ low    : Factor w/ 2 levels "birth weight >2500 g (normal)",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ age    : int  19 33 20 21 18 21 22 17 29 26 ...
 $ lwt    : int  182 155 105 108 107 124 118 103 123 113 ...
 $ race   : Factor w/ 3 levels "White","Black",..: 2 3 1 1 1 3 1 3 1 1 ...
 $ smoke  : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 1 2 2 ...
 $ ptl    : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
 $ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ ui     : Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 1 1 1 1 ...
 $ ftv    : int  0 3 1 2 0 0 1 1 1 0 ...
 $ bwt    : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
 $ ptl_any: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
tibble::glimpse(df_mom)
Rows: 189
Columns: 12
$ id      <fct> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 1…
$ low     <fct> birth weight >2500 g (normal), birth weight >2500 g (normal), …
$ age     <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18…
$ lwt     <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95,…
$ race    <fct> Black, Other, White, White, White, Other, White, Other, White,…
$ smoke   <fct> No, No, Yes, Yes, Yes, No, No, No, Yes, Yes, No, No, No, No, Y…
$ ptl     <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
$ ht      <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, No, N…
$ ui      <fct> Yes, No, No, Yes, Yes, No, No, No, No, No, No, No, No, Yes, No…
$ ftv     <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2,…
$ bwt     <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 27…
$ ptl_any <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, N…

16.2 EXPLORATORY DATA ANALYSIS

16.2.1 Missing Data

df_mom %>% 
  dplyr::select("Low Birth Weight Delivery" = low,
                "Age, years" = age, 
                "Weight, pounds" = lwt, 
                "Race" = race, 
                "Smoking During pregnancy" = smoke, 
                "History of Premature Labor, any" = ptl_any, 
                "History of Premature Labor, number" = ptl, 
                "History of Hypertension" = ht, 
                "Uterince Irritability" = ui, 
                "1st Tri Dr Visits" = ftv) %>% 
  naniar::miss_var_summary() %>%
  dplyr::select(Variable = variable,
                n = n_miss) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "Missing Data by Variable")
Table 16.1
Missing Data by Variable

Variable

n

Low Birth Weight Delivery

0

Age, years

0

Weight, pounds

0

Race

0

Smoking During pregnancy

0

History of Premature Labor, any

0

History of Premature Labor, number

0

History of Hypertension

0

Uterince Irritability

0

1st Tri Dr Visits

0

16.2.2 Summary

df_mom %>% 
  dplyr::select("Low Birth Weight Delivery" = low,
                "Race" = race, 
                "Smoking During pregnancy" = smoke, 
                "History of Premature Labor, any" = ptl_any, 
                "History of Premature Labor, number" = ptl, 
                "History of Hypertension" = ht, 
                "Uterince Irritability" = ui) %>% 
  apaSupp::tab_freq(caption = "Summary of Categorical Variables")
Table 16.2
Summary of Categorical Variables

Statistic
(N=189)

Low Birth Weight Delivery

birth weight >2500 g (normal)

130 (68.8%)

birth weight < 2500 g (low)

59 (31.2%)

Race

White

96 (50.8%)

Black

26 (13.8%)

Other

67 (35.4%)

Smoking During pregnancy

No

115 (60.8%)

Yes

74 (39.2%)

History of Premature Labor, any

No

159 (84.1%)

Yes

30 (15.9%)

History of Premature Labor, number

0

159 (84.1%)

1

24 (12.7%)

2

5 (2.6%)

3

1 (0.5%)

History of Hypertension

No

177 (93.7%)

Yes

12 (6.3%)

Uterince Irritability

No

161 (85.2%)

Yes

28 (14.8%)

df_mom %>% 
  dplyr::select("Age, yrs" = age, 
                "Weight, lbs" = lwt, 
                "1st Tri Dr Visits" = ftv) %>% 
  apaSupp::tab_desc(caption = "Summary of Continuous Variables")
Table 16.3
Summary of Continuous Variables

NA

M

SD

min

Q1

Mdn

Q3

max

Age, yrs

0

23.24

5.30

14

19.00

23

26.00

45

Weight, lbs

0

129.81

30.58

80

110.00

121

140.00

250

1st Tri Dr Visits

0

0.79

1.06

0

0.00

0

1.00

6

Note. N = 189. NA = not available or missing; Mdn = median; Q1 = 25th percentile; Q3 = 75th percentile.

16.3 UNADJUSTED

Unadjusted Models

16.3.1 Fit Models

fit_glm_age   <- glm(low ~ age,     family = binomial(link = "logit"), data = df_mom)
fit_glm_lwt   <- glm(low ~ lwt,     family = binomial(link = "logit"), data = df_mom)
fit_glm_race  <- glm(low ~ race,    family = binomial(link = "logit"), data = df_mom)
fit_glm_smoke <- glm(low ~ smoke,   family = binomial(link = "logit"), data = df_mom)
fit_glm_ptl   <- glm(low ~ ptl_any, family = binomial(link = "logit"), data = df_mom)
fit_glm_ht    <- glm(low ~ ht,      family = binomial(link = "logit"), data = df_mom)
fit_glm_ui    <- glm(low ~ ui,      family = binomial(link = "logit"), data = df_mom)
fit_glm_ftv   <- glm(low ~ ftv,     family = binomial(link = "logit"), data = df_mom)

16.3.2 Parameter Tables

Note: the parameter estimates here are for the LOGIT scale, not the odds ration (OR) or even the probability.

apaSupp::tab_glms(list(fit_glm_age, fit_glm_lwt, fit_glm_race, fit_glm_smoke),
                  narrow = TRUE,
                  fit = NA,
                  pr2 = "tjur")
Table 16.4
Compare Generalized Regression Models

Model 1

Model 2

Model 3

Model 4

Variable

OR

95% CI

OR

95% CI

OR

95% CI

OR

95% CI

age

0.95

[0.89, 1.01]

lwt

0.99

[0.97, 1.00]*

race

White

Black

2.33

[0.93, 5.77]

Other

1.89

[0.96, 3.76]

smoke

No

Yes

2.02

[1.08, 3.80]*

pseudo-R²

.014

.031

.026

.026

Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glms(list(fit_glm_ptl, fit_glm_ht, fit_glm_ui, fit_glm_ftv),
                  narrow = TRUE,
                  fit = NA,
                  pr2 = "tjur")
Table 16.5
Compare Generalized Regression Models

Model 1

Model 2

Model 3

Model 4

Variable

OR

95% CI

OR

95% CI

OR

95% CI

OR

95% CI

ptl_any

No

Yes

4.32

[1.94, 9.95]***

ht

No

Yes

3.37

[1.03, 11.83]*

ui

No

Yes

2.58

[1.13, 5.88]*

ftv

0.87

[0.63, 1.17]

pseudo-R²

.073

.023

.029

.004

Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

16.4 MAIN EFFECTS ONLY

Main-effects multiple logistic regression model

fit_glm_mains <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui,
                     family = binomial(link = "logit"), 
                     data = df_mom)
apaSupp::tab_glm(fit_glm_mains,
                 show_single_row = c("smoke", "ptl_any", "ht", "ui"))
Table 16.6
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

age

0.96

[0.89, 1.04]

-0.04

(0.04)

.318

.313

1.09

lwt

0.99

[0.97, 1.00]

-0.01

(0.01)

.034*

.025*

1.31

race

.041*

1.52

White

Black

3.36

[1.19, 9.73]

1.213

(0.53)

.023*

Other

2.23

[0.94, 5.50]

.804

(0.45)

.073

smoke

2.33

[1.06, 5.29]

0.85

(0.41)

.038*

.036*

1.35

ptl_any

3.39

[1.38, 8.60]

1.22

(0.46)

.008**

.008**

1.10

ht

6.29

[1.64, 27.26]

1.84

(0.70)

.009**

.007**

1.16

ui

2.04

[0.81, 5.06]

0.71

(0.46)

.125

.128

1.05

(Intercept)

0.64

(1.23)

.605

pseudo-R²

.190

Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

16.5 INTERACTIONS

Before removing non-significant main effects, test plausible interactions

Try the following interactions:

  • Age and Weight

  • Age and Smoking

  • Weight and Smoking

fit_glm_mains_aw <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:lwt,
                        family = binomial(link = "logit"), 
                        data = df_mom)

fit_glm_mains_as <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:smoke,
                        family = binomial(link = "logit"), 
                        data = df_mom)

fit_glm_mains_ws <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + lwt:smoke,
                        family = binomial(link = "logit"), 
                        data = df_mom)
apaSupp::tab_glms(list("Age-Weight"     = fit_glm_mains_aw,
                       "Age-Smoking"    = fit_glm_mains_as,
                       "Weight-Smoking" = fit_glm_mains_ws),
                  show_single_row = c("smoke", "ptl_any", "ht", "ui"),
                  narrow = TRUE)
Table 16.7
Compare Generalized Regression Models

Age-Weight

Age-Smoking

Weight-Smoking

Variable

OR

95% CI

OR

95% CI

OR

95% CI

age

0.94

[0.67, 1.33]

0.93

[0.84, 1.03]

0.96

[0.89, 1.04]

lwt

0.98

[0.92, 1.04]

0.99

[0.97, 1.00]*

0.98

[0.95, 1.00]*

race

White

Black

3.34

[1.18, 9.71]*

3.08

[1.06, 9.11]*

3.42

[1.21, 9.95]*

Other

2.22

[0.93, 5.50]

2.16

[0.90, 5.33]

2.05

[0.85, 5.07]

smoke

2.32

[1.05, 5.28]*

0.55

[0.02, 17.99]

0.32

[0.01, 9.36]

ptl_any

3.41

[1.38, 8.66]**

3.31

[1.35, 8.36]**

3.54

[1.44, 9.02]**

ht

6.29

[1.64, 27.28]**

6.43

[1.67, 28.04]**

5.72

[1.48, 24.94]*

ui

2.04

[0.81, 5.09]

2.19

[0.86, 5.56]

2.23

[0.88, 5.63]

age * lwt

1.00

[1.00, 1.00]

age * smoke

age * Yes

1.07

[0.92, 1.24]

lwt * smoke

lwt * Yes

1.02

[0.99, 1.04]

Fit Metrics

AIC

216.8

216.1

215.4

BIC

249.2

248.6

247.9

pseudo-R²

Tjur

.190

.192

.196

McFadden

.161

.164

.167

Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted with both Tjur and McFadden's psuedo R-squared

* p < .05. ** p < .01. *** p < .001.

apaSupp::tab_glm_fits(list("Only Mains"     = fit_glm_mains,
                           "Age-Weight"     = fit_glm_mains_aw,
                           "Age-Smoking"    = fit_glm_mains_as,
                           "Weight-Smoking" = fit_glm_mains_ws))
Table 16.8
Comparison of Generalized Linear Model Performane Metrics

pseudo- R2R^2

Model

N

k

McFadden

Tjur

AIC

BIC

RMSE

Only Mains

189

9

.161

.190

214.83

244.01

0.42

Age-Weight

189

10

.161

.190

216.82

249.23

0.42

Age-Smoking

189

10

.164

.192

216.14

248.55

0.42

Weight-Smoking

189

10

.167

.196

215.44

247.86

0.42

Note. k = number of parameters estimated in each model. Larger R2R^2 values indicated better performance. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

anova(fit_glm_mains, fit_glm_mains_aw, test = 'LRT')
# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA  NA          NA    
2         179         197.     1   0.0170      0.896
anova(fit_glm_mains, fit_glm_mains_as, test = 'LRT')
# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA   NA         NA    
2         179         196.     1    0.699      0.403
anova(fit_glm_mains, fit_glm_mains_ws, test = 'LRT')
# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA    NA        NA    
2         179         195.     1     1.39      0.238

16.6 PARSAMONY

No interactions are significant Remove non-significant main effects

Since the mother’s age is theoretically a meaningful variable, it should probably be retained.

Remove “UI” since its not significant

fit_glm_trim <- glm(low ~ age + lwt + race + smoke + ptl_any + ht,
                    family = binomial(link = "logit"), 
                    data = df_mom)
apaSupp::tab_glm(fit_glm_trim)
Table 16.9
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

age

0.96

[0.89, 1.03]

-0.04

(0.04)

.255

.249

1.09

lwt

0.98

[0.97, 1.00]

-0.02

(0.01)

.028*

.020*

1.29

race

.044*

1.49

White

Black

3.22

[1.13, 9.31]

1.168

(0.53)

.028*

Other

2.26

[0.96, 5.50]

.815

(0.44)

.066

smoke

.032*

1.35

No

Yes

2.36

[1.08, 5.32]

.9

(0.40)

.034*

ptl_any

.003**

1.08

No

Yes

3.80

[1.57, 9.53]

1.3

(0.46)

.004**

ht

.011*

1.15

No

Yes

5.70

[1.49, 24.76]

1.7

(0.70)

.013*

(Intercept)

0.92

(1.20)

.442

pseudo-R²

.180

Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².

* p < .05. ** p < .01. *** p < .001.

16.7 CENTER & SCALE

Since the mother’s age is theoretically a meaningful variable, it should probably be retained.

Revise so that age is interpreted in 5-year and pre-pregnancy weight in 20 lb increments and the intercept has meaning.

fit_glm_final <- glm(low ~ I((age - 20)/5) + I((lwt - 125)/20) + race + smoke + ptl_any + ht,
                     family = binomial(link = "logit"), 
                     data = df_mom)
apaSupp::tab_glm(fit_glm_final,
                 var_labels = c("I((age - 20)/5)" = "Age, 5 yr",
                                "I((lwt - 125)/20)" = "Pre-Preg Weight, 20 lb",
                                race = "Race",
                                smoke = "Smoker",
                                ptl_any = "Prior Preterm Labor",
                                ht = "Hx Hypertension"),
                 p_note = "apa13",
                 show_single_row = c("smoke", "ptl_any", "ht"),
                 general_note = "Centering for age (20 yr) and weight (125 lbs)")
Table 16.10
Parameter Estimates for Generalized Linear Regression

Odds Ratio

Logit Scale

Variable

OR

95% CI

b

(SE)

Wald

LRT

VIF

Age, 5 yr

0.81

[0.55, 1.16]

-0.21

(0.19)

.255

.249

1.09

Pre-Preg Weight, 20 lb

0.73

[0.55, 0.95]

-0.31

(0.14)

.028*

.020*

1.29

Race

.044*

1.49

White

Black

3.22

[1.13, 9.31]

1.168

(0.53)

.028*

Other

2.26

[0.96, 5.50]

.815

(0.44)

.066

Smoker

2.36

[1.08, 5.32]

0.86

(0.40)

.034*

.032*

1.35

Prior Preterm Labor

3.80

[1.57, 9.53]

1.33

(0.46)

.004**

.003**

1.08

Hx Hypertension

5.70

[1.49, 24.76]

1.74

(0.70)

.013*

.011*

1.15

(Intercept)

-1.86

(0.41)

< .001***

pseudo-R²

.180

Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R². Centering for age (20 yr) and weight (125 lbs)

* p < .05. *** p < .001.

16.7.1 Marginal Model Plot

16.7.1.1 Focus on: Mother’s Age, weight, and race

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = race,
                            mod2 = age)

effects::Effect(focal.predictors = c("age", "lwt", "race"),
                mod = fit_glm_final,
                xlevels = list(age = c(20, 30, 40),
                               lwt = seq(from = 80, to = 250, by = 5))) %>% 
  data.frame() %>% 
  dplyr::mutate(age_labels = glue::glue("Mother Age: {age}")) %>% 
  ggplot(aes(x = lwt,
             y = fit)) +
  geom_line(aes(color = race,
                linetype = race),
            size = 1) +
  theme_bw() +
  facet_grid(.~ age_labels) +
  labs(title = "Risk of Low Birth Weight",
       subtitle = "Illustates risk given mother is a non-smoker, without a history of pre-term labor or hypertension",
       x = "Mother's Weight Pre-Pregnancy, pounds",
       y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
       color    = "Mother's Race",
       linetype = "Mother's Race") +
  theme(legend.position = c(1, 1),
        legend.justification = c(1.1, 1.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_linetype_manual(values = c("longdash", "dotted", "solid")) +
  scale_color_manual(values = c( "coral2", "dodger blue", "gray50"))

16.7.1.2 Focus on: Mother’s weight and smoking status during pregnancy, as well as history of any per-term labor and hypertension

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = smoke,
                            mod2 = ptl_any)

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = smoke,
                            mod2 = ht)

effects::Effect(focal.predictors = c("lwt", "smoke", "ptl_any", "ht"),
                fixed.predictors = list(age = 20),
                mod = fit_glm_final,
                xlevels = list(lwt = seq(from = 80, to = 250, by = 5))) %>% 
  data.frame() %>% 
  dplyr::mutate(smoke = forcats::fct_rev(smoke)) %>% 
  dplyr::mutate(ptl_any_labels = glue::glue("History of Preterm Labor: {ptl_any}")) %>% 
  dplyr::mutate(ht_labels = glue::glue("History of Hypertension: {ht}") %>% forcats::fct_rev()) %>% 
  ggplot(aes(x = lwt,
             y = fit)) +
  geom_line(aes(color = smoke,
                linetype = smoke),
            size = 1) +
  theme_bw() +
  facet_grid(ht_labels ~ ptl_any_labels) +
  labs(title = "Risk of Low Birth Weight",
       subtitle = "Illustates risk given the mother is 20 years old and white",
       x = "Mother's Weight Pre-Pregnancy, pounds",
       y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
       color    = "Mother Smoked",
       linetype = "Mother Smoked") +
  theme(legend.position = c(1, .5),
        legend.justification = c(1.1, 1.15),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm")) +
  scale_linetype_manual(values = c("longdash", "solid")) +
  scale_color_manual(values = c( "coral2", "dodger blue"))