16 Ex: Logistic - Maternal Risk (Hoffman)

Maternal Risk Factor for Low Birth Weight Delivery

Compiled: April 22, 2025

16.1 PREPARATION

16.1.1 Load Packages

# library(remotes)
# remotes::install_github("sarbearschwartz/apaSupp")  # updated: 4/12/25
# remotes::install_github("ddsjoberg/gtsummary")


library(tidyverse)
library(haven)        
library(naniar)
library(apaSupp)
library(performance) 
library(interactions)
library(GGally)

16.1.2 Load Data

More complex example demonstrating modeling decisions

Another set of data from a study investigating predictors of low birth weight

id infant’s unique identification number

Dependent variable (DV) or outcome

low Low birth weight (outcome)
0 = birth weight >2500 g (normal)
1 = birth weight < 2500 g (low))
bwt actual infant birth weight in grams (ignore for now)

Independent variables (IV) or predictors

age Age of mother, in years
lwt Mother’s weight at last menstrual period, in pounds
race Race: 1 = White, 2 = Black, 3 = Other
smoke Smoking status during pregnancy:1 = Yes, 0 = No
ptl History of premature labor: 0 = None, 1 = One, 2 = two, 3 = three
ht History of hypertension: 1 = Yes, 0 = No
ui Uterine irritability: 1 = Yes, 0 = No
ftv Number of physician visits in 1st trimester: 0 = None, 1 = One, … 6 = six

The data is saved in a text file (.txt) without any labels.

df_txt <- read.table("https://raw.githubusercontent.com/CEHS-research/data/master/Regression/lowbwt.txt", 
                     header = TRUE, 
                     sep = "", 
                     na.strings = "NA", 
                     dec = ".", 
                     strip.white = TRUE)

tibble::glimpse(df_txt)

Rows: 189
Columns: 11
$ id    <int> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101…
$ low   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ age   <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18, …
$ lwt   <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95, 1…
$ race  <int> 2, 3, 1, 1, 1, 3, 1, 3, 1, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 1, 3, 1…
$ smoke <int> 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0…
$ ptl   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ ht    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ui    <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1…
$ ftv   <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2, 3…
$ bwt   <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 2722…

16.1.3 Wrangle Data

df_mom <- df_txt %>% 
  dplyr::mutate(id = factor(id)) %>% 
  dplyr::mutate(low = low %>% 
                  factor() %>% 
                  forcats::fct_recode("birth weight >2500 g (normal)" = "0",
                                      "birth weight < 2500 g (low)"   = "1")) %>% 
  dplyr::mutate(race = race %>% 
                  factor() %>% 
                  forcats::fct_recode("White" = "1",
                                      "Black" = "2",
                                      "Other" = "3")) %>% 
  dplyr::mutate(ptl_any = as.numeric(ptl > 0)) %>%         # collapse into 0 = none vs. 1 = at least one
  dplyr::mutate(ptl = factor(ptl)) %>%                     # declare the number of pre-term labors to be a factor: 0, 1, 2, 3
  dplyr::mutate_at(vars(smoke, ht, ui, ptl_any),           # declare all there variables to be factors with the same two levels
                   factor,
                   levels = 0:1,
                   labels = c("No", "Yes"))

Display the structure of the ‘clean’ version of the dataset

str(df_mom)

'data.frame':   189 obs. of  12 variables:
 $ id     : Factor w/ 189 levels "4","10","11",..: 60 61 62 63 64 65 66 67 68 69 ...
 $ low    : Factor w/ 2 levels "birth weight >2500 g (normal)",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ age    : int  19 33 20 21 18 21 22 17 29 26 ...
 $ lwt    : int  182 155 105 108 107 124 118 103 123 113 ...
 $ race   : Factor w/ 3 levels "White","Black",..: 2 3 1 1 1 3 1 3 1 1 ...
 $ smoke  : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 1 2 2 ...
 $ ptl    : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
 $ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ ui     : Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 1 1 1 1 ...
 $ ftv    : int  0 3 1 2 0 0 1 1 1 0 ...
 $ bwt    : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
 $ ptl_any: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

tibble::glimpse(df_mom)

Rows: 189
Columns: 12
$ id      <fct> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 1…
$ low     <fct> birth weight >2500 g (normal), birth weight >2500 g (normal), …
$ age     <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18…
$ lwt     <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95,…
$ race    <fct> Black, Other, White, White, White, Other, White, Other, White,…
$ smoke   <fct> No, No, Yes, Yes, Yes, No, No, No, Yes, Yes, No, No, No, No, Y…
$ ptl     <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
$ ht      <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, No, N…
$ ui      <fct> Yes, No, No, Yes, Yes, No, No, No, No, No, No, No, No, Yes, No…
$ ftv     <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2,…
$ bwt     <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 27…
$ ptl_any <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, N…

16.2 EXPLORATORY DATA ANALYSIS

16.2.1 Missing Data

df_mom %>% 
  dplyr::select("Low Birth Weight Delivery" = low,
                "Age, years" = age, 
                "Weight, pounds" = lwt, 
                "Race" = race, 
                "Smoking During pregnancy" = smoke, 
                "History of Premature Labor, any" = ptl_any, 
                "History of Premature Labor, number" = ptl, 
                "History of Hypertension" = ht, 
                "Uterince Irritability" = ui, 
                "1st Tri Dr Visits" = ftv) %>% 
  naniar::miss_var_summary() %>%
  dplyr::select(Variable = variable,
                n = n_miss) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "Missing Data by Variable")

**Table 16.1**
Missing Data by Variable
Variable	n
Low Birth Weight Delivery	0
Age, years	0
Weight, pounds	0
Race	0
Smoking During pregnancy	0
History of Premature Labor, any	0
History of Premature Labor, number	0
History of Hypertension	0
Uterince Irritability	0
1st Tri Dr Visits	0

16.2.2 Summary

df_mom %>% 
  dplyr::select("Low Birth Weight Delivery" = low,
                "Race" = race, 
                "Smoking During pregnancy" = smoke, 
                "History of Premature Labor, any" = ptl_any, 
                "History of Premature Labor, number" = ptl, 
                "History of Hypertension" = ht, 
                "Uterince Irritability" = ui) %>% 
  apaSupp::tab_freq(caption = "Summary of Categorical Variables")

**Table 16.2**
Summary of Categorical Variables
		Statistic (N=189)
Low Birth Weight Delivery
birth weight >2500 g (normal)		130 (68.8%)
birth weight < 2500 g (low)		59 (31.2%)
Race
White		96 (50.8%)
Black		26 (13.8%)
Other		67 (35.4%)
Smoking During pregnancy
No		115 (60.8%)
Yes		74 (39.2%)
History of Premature Labor, any
No		159 (84.1%)
Yes		30 (15.9%)
History of Premature Labor, number
0		159 (84.1%)
1		24 (12.7%)
2		5 (2.6%)
3		1 (0.5%)
History of Hypertension
No		177 (93.7%)
Yes		12 (6.3%)
Uterince Irritability
No		161 (85.2%)
Yes		28 (14.8%)

df_mom %>% 
  dplyr::select("Age, yrs" = age, 
                "Weight, lbs" = lwt, 
                "1st Tri Dr Visits" = ftv) %>% 
  apaSupp::tab_desc(caption = "Summary of Continuous Variables")

**Table 16.3**
Summary of Continuous Variables
	NA	M	SD	min	Q1	Mdn	Q3	max
Age, yrs	0	23.24	5.30	14	19.00	23	26.00	45
Weight, lbs	0	129.81	30.58	80	110.00	121	140.00	250
1st Tri Dr Visits	0	0.79	1.06	0	0.00	0	1.00	6
Note. N = 189. NA = not available or missing; Mdn = median; Q1 = 25th percentile; Q3 = 75th percentile.

16.3 UNADJUSTED

Unadjusted Models

16.3.1 Fit Models

fit_glm_age   <- glm(low ~ age,     family = binomial(link = "logit"), data = df_mom)
fit_glm_lwt   <- glm(low ~ lwt,     family = binomial(link = "logit"), data = df_mom)
fit_glm_race  <- glm(low ~ race,    family = binomial(link = "logit"), data = df_mom)
fit_glm_smoke <- glm(low ~ smoke,   family = binomial(link = "logit"), data = df_mom)
fit_glm_ptl   <- glm(low ~ ptl_any, family = binomial(link = "logit"), data = df_mom)
fit_glm_ht    <- glm(low ~ ht,      family = binomial(link = "logit"), data = df_mom)
fit_glm_ui    <- glm(low ~ ui,      family = binomial(link = "logit"), data = df_mom)
fit_glm_ftv   <- glm(low ~ ftv,     family = binomial(link = "logit"), data = df_mom)

16.3.2 Parameter Tables

Note: the parameter estimates here are for the LOGIT scale, not the odds ration (OR) or even the probability.

apaSupp::tab_glms(list(fit_glm_age, fit_glm_lwt, fit_glm_race, fit_glm_smoke),
                  narrow = TRUE,
                  fit = NA,
                  pr2 = "tjur")

**Table 16.4**
Compare Generalized Regression Models
	Model 1		Model 2		Model 3		Model 4
Variable	OR	95% CI	OR	95% CI	OR	95% CI	OR	95% CI
age	0.95	[0.89, 1.01]
lwt			0.99	[0.97, 1.00]*
race
White					—	—
Black					2.33	[0.93, 5.77]
Other					1.89	[0.96, 3.76]
smoke
No							—	—
Yes							2.02	[1.08, 3.80]*
pseudo-R²	.014		.031		.026		.026
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared
* p < .05. p < .01. * p < .001.

apaSupp::tab_glms(list(fit_glm_ptl, fit_glm_ht, fit_glm_ui, fit_glm_ftv),
                  narrow = TRUE,
                  fit = NA,
                  pr2 = "tjur")

**Table 16.5**
Compare Generalized Regression Models
	Model 1		Model 2		Model 3		Model 4
Variable	OR	95% CI	OR	95% CI	OR	95% CI	OR	95% CI
ptl_any
No	—	—
Yes	4.32	[1.94, 9.95]***
ht
No			—	—
Yes			3.37	[1.03, 11.83]*
ui
No					—	—
Yes					2.58	[1.13, 5.88]*
ftv							0.87	[0.63, 1.17]
pseudo-R²	.073		.023		.029		.004
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared
* p < .05. p < .01. * p < .001.

16.4 MAIN EFFECTS ONLY

Main-effects multiple logistic regression model

fit_glm_mains <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui,
                     family = binomial(link = "logit"), 
                     data = df_mom)

apaSupp::tab_glm(fit_glm_mains,
                 show_single_row = c("smoke", "ptl_any", "ht", "ui"))

**Table 16.6**
Parameter Estimates for Generalized Linear Regression
	Odds Ratio		Logit Scale
Variable	OR	95% CI	b	(SE)	Wald	LRT	VIF
age	0.96	[0.89, 1.04]	-0.04	(0.04)	.318	.313	1.09
lwt	0.99	[0.97, 1.00]	-0.01	(0.01)	.034*	.025*	1.31
race						.041*	1.52
White	—	—	—	—
Black	3.36	[1.19, 9.73]	1.213	(0.53)	.023*
Other	2.23	[0.94, 5.50]	.804	(0.45)	.073
smoke	2.33	[1.06, 5.29]	0.85	(0.41)	.038*	.036*	1.35
ptl_any	3.39	[1.38, 8.60]	1.22	(0.46)	.008**	.008**	1.10
ht	6.29	[1.64, 27.26]	1.84	(0.70)	.009**	.007**	1.16
ui	2.04	[0.81, 5.06]	0.71	(0.46)	.125	.128	1.05
(Intercept)			0.64	(1.23)	.605
pseudo-R²	.190
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².
* p < .05. p < .01. * p < .001.

16.5 INTERACTIONS

Before removing non-significant main effects, test plausible interactions

Try the following interactions:

Age and Weight
Age and Smoking
Weight and Smoking

fit_glm_mains_aw <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:lwt,
                        family = binomial(link = "logit"), 
                        data = df_mom)

fit_glm_mains_as <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:smoke,
                        family = binomial(link = "logit"), 
                        data = df_mom)

fit_glm_mains_ws <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + lwt:smoke,
                        family = binomial(link = "logit"), 
                        data = df_mom)

apaSupp::tab_glms(list("Age-Weight"     = fit_glm_mains_aw,
                       "Age-Smoking"    = fit_glm_mains_as,
                       "Weight-Smoking" = fit_glm_mains_ws),
                  show_single_row = c("smoke", "ptl_any", "ht", "ui"),
                  narrow = TRUE)

**Table 16.7**
Compare Generalized Regression Models
	Age-Weight		Age-Smoking		Weight-Smoking
Variable	OR	95% CI	OR	95% CI	OR	95% CI
age	0.94	[0.67, 1.33]	0.93	[0.84, 1.03]	0.96	[0.89, 1.04]
lwt	0.98	[0.92, 1.04]	0.99	[0.97, 1.00]*	0.98	[0.95, 1.00]*
race
White	—	—	—	—	—	—
Black	3.34	[1.18, 9.71]*	3.08	[1.06, 9.11]*	3.42	[1.21, 9.95]*
Other	2.22	[0.93, 5.50]	2.16	[0.90, 5.33]	2.05	[0.85, 5.07]
smoke	2.32	[1.05, 5.28]*	0.55	[0.02, 17.99]	0.32	[0.01, 9.36]
ptl_any	3.41	[1.38, 8.66]**	3.31	[1.35, 8.36]**	3.54	[1.44, 9.02]**
ht	6.29	[1.64, 27.28]**	6.43	[1.67, 28.04]**	5.72	[1.48, 24.94]*
ui	2.04	[0.81, 5.09]	2.19	[0.86, 5.56]	2.23	[0.88, 5.63]
age * lwt	1.00	[1.00, 1.00]
age * smoke
age * Yes			1.07	[0.92, 1.24]
lwt * smoke
lwt * Yes					1.02	[0.99, 1.04]
Fit Metrics
AIC	216.8		216.1		215.4
BIC	249.2		248.6		247.9
pseudo-R²
Tjur	.190		.192		.196
McFadden	.161		.164		.167
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted with both Tjur and McFadden's psuedo R-squared
* p < .05. p < .01. * p < .001.

apaSupp::tab_glm_fits(list("Only Mains"     = fit_glm_mains,
                           "Age-Weight"     = fit_glm_mains_aw,
                           "Age-Smoking"    = fit_glm_mains_as,
                           "Weight-Smoking" = fit_glm_mains_ws))

**Table 16.8**
Comparison of Generalized Linear Model Performane Metrics
			pseudo- $R^2$
Model	N	k	McFadden	Tjur	AIC	BIC	RMSE
Only Mains	189	9	.161	.190	214.83	244.01	0.42
Age-Weight	189	10	.161	.190	216.82	249.23	0.42
Age-Smoking	189	10	.164	.192	216.14	248.55	0.42
Weight-Smoking	189	10	.167	.196	215.44	247.86	0.42
Note. k = number of parameters estimated in each model. Larger $R^2$ values indicated better performance. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE).

anova(fit_glm_mains, fit_glm_mains_aw, test = 'LRT')

# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA  NA          NA    
2         179         197.     1   0.0170      0.896

anova(fit_glm_mains, fit_glm_mains_as, test = 'LRT')

# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA   NA         NA    
2         179         196.     1    0.699      0.403

anova(fit_glm_mains, fit_glm_mains_ws, test = 'LRT')

# A tibble: 2 × 5
  `Resid. Df` `Resid. Dev`    Df Deviance `Pr(>Chi)`
        <dbl>        <dbl> <dbl>    <dbl>      <dbl>
1         180         197.    NA    NA        NA    
2         179         195.     1     1.39      0.238

16.6 PARSAMONY

No interactions are significant Remove non-significant main effects

Since the mother’s age is theoretically a meaningful variable, it should probably be retained.

Remove “UI” since its not significant

fit_glm_trim <- glm(low ~ age + lwt + race + smoke + ptl_any + ht,
                    family = binomial(link = "logit"), 
                    data = df_mom)

apaSupp::tab_glm(fit_glm_trim)

**Table 16.9**
Parameter Estimates for Generalized Linear Regression
	Odds Ratio		Logit Scale
Variable	OR	95% CI	b	(SE)	Wald	LRT	VIF
age	0.96	[0.89, 1.03]	-0.04	(0.04)	.255	.249	1.09
lwt	0.98	[0.97, 1.00]	-0.02	(0.01)	.028*	.020*	1.29
race						.044*	1.49
White	—	—	—	—
Black	3.22	[1.13, 9.31]	1.168	(0.53)	.028*
Other	2.26	[0.96, 5.50]	.815	(0.44)	.066
smoke						.032*	1.35
No	—	—	—	—
Yes	2.36	[1.08, 5.32]	.9	(0.40)	.034*
ptl_any						.003**	1.08
No	—	—	—	—
Yes	3.80	[1.57, 9.53]	1.3	(0.46)	.004**
ht						.011*	1.15
No	—	—	—	—
Yes	5.70	[1.49, 24.76]	1.7	(0.70)	.013*
(Intercept)			0.92	(1.20)	.442
pseudo-R²	.180
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R².
* p < .05. p < .01. * p < .001.

16.7 CENTER & SCALE

Since the mother’s age is theoretically a meaningful variable, it should probably be retained.

Revise so that age is interpreted in 5-year and pre-pregnancy weight in 20 lb increments and the intercept has meaning.

fit_glm_final <- glm(low ~ I((age - 20)/5) + I((lwt - 125)/20) + race + smoke + ptl_any + ht,
                     family = binomial(link = "logit"), 
                     data = df_mom)

apaSupp::tab_glm(fit_glm_final,
                 var_labels = c("I((age - 20)/5)" = "Age, 5 yr",
                                "I((lwt - 125)/20)" = "Pre-Preg Weight, 20 lb",
                                race = "Race",
                                smoke = "Smoker",
                                ptl_any = "Prior Preterm Labor",
                                ht = "Hx Hypertension"),
                 p_note = "apa13",
                 show_single_row = c("smoke", "ptl_any", "ht"),
                 general_note = "Centering for age (20 yr) and weight (125 lbs)")

**Table 16.10**
Parameter Estimates for Generalized Linear Regression
	Odds Ratio		Logit Scale
Variable	OR	95% CI	b	(SE)	Wald	LRT	VIF
Age, 5 yr	0.81	[0.55, 1.16]	-0.21	(0.19)	.255	.249	1.09
Pre-Preg Weight, 20 lb	0.73	[0.55, 0.95]	-0.31	(0.14)	.028*	.020*	1.29
Race						.044*	1.49
White	—	—	—	—
Black	3.22	[1.13, 9.31]	1.168	(0.53)	.028*
Other	2.26	[0.96, 5.50]	.815	(0.44)	.066
Smoker	2.36	[1.08, 5.32]	0.86	(0.40)	.034*	.032*	1.35
Prior Preterm Labor	3.80	[1.57, 9.53]	1.33	(0.46)	.004**	.003**	1.08
Hx Hypertension	5.70	[1.49, 24.76]	1.74	(0.70)	.013*	.011*	1.15
(Intercept)			-1.86	(0.41)	< .001***
pseudo-R²	.180
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R². Centering for age (20 yr) and weight (125 lbs)
* p < .05. *** p < .001.

16.7.1 Marginal Model Plot

16.7.1.1 Focus on: Mother’s Age, weight, and race

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = race,
                            mod2 = age)

effects::Effect(focal.predictors = c("age", "lwt", "race"),
                mod = fit_glm_final,
                xlevels = list(age = c(20, 30, 40),
                               lwt = seq(from = 80, to = 250, by = 5))) %>% 
  data.frame() %>% 
  dplyr::mutate(age_labels = glue::glue("Mother Age: {age}")) %>% 
  ggplot(aes(x = lwt,
             y = fit)) +
  geom_line(aes(color = race,
                linetype = race),
            size = 1) +
  theme_bw() +
  facet_grid(.~ age_labels) +
  labs(title = "Risk of Low Birth Weight",
       subtitle = "Illustates risk given mother is a non-smoker, without a history of pre-term labor or hypertension",
       x = "Mother's Weight Pre-Pregnancy, pounds",
       y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
       color    = "Mother's Race",
       linetype = "Mother's Race") +
  theme(legend.position = c(1, 1),
        legend.justification = c(1.1, 1.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_linetype_manual(values = c("longdash", "dotted", "solid")) +
  scale_color_manual(values = c( "coral2", "dodger blue", "gray50"))

16.7.1.2 Focus on: Mother’s weight and smoking status during pregnancy, as well as history of any per-term labor and hypertension

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = smoke,
                            mod2 = ptl_any)

interactions::interact_plot(model = fit_glm_final,
                            pred = lwt,
                            modx = smoke,
                            mod2 = ht)

effects::Effect(focal.predictors = c("lwt", "smoke", "ptl_any", "ht"),
                fixed.predictors = list(age = 20),
                mod = fit_glm_final,
                xlevels = list(lwt = seq(from = 80, to = 250, by = 5))) %>% 
  data.frame() %>% 
  dplyr::mutate(smoke = forcats::fct_rev(smoke)) %>% 
  dplyr::mutate(ptl_any_labels = glue::glue("History of Preterm Labor: {ptl_any}")) %>% 
  dplyr::mutate(ht_labels = glue::glue("History of Hypertension: {ht}") %>% forcats::fct_rev()) %>% 
  ggplot(aes(x = lwt,
             y = fit)) +
  geom_line(aes(color = smoke,
                linetype = smoke),
            size = 1) +
  theme_bw() +
  facet_grid(ht_labels ~ ptl_any_labels) +
  labs(title = "Risk of Low Birth Weight",
       subtitle = "Illustates risk given the mother is 20 years old and white",
       x = "Mother's Weight Pre-Pregnancy, pounds",
       y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
       color    = "Mother Smoked",
       linetype = "Mother Smoked") +
  theme(legend.position = c(1, .5),
        legend.justification = c(1.1, 1.15),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm")) +
  scale_linetype_manual(values = c("longdash", "solid")) +
  scale_color_manual(values = c( "coral2", "dodger blue"))