16 Ex: Logistic - Maternal Risk (Hoffman)
Maternal Risk Factor for Low Birth Weight Delivery
Compiled: April 22, 2025
16.1 PREPARATION
16.1.2 Load Data
More complex example demonstrating modeling decisions
Another set of data from a study investigating predictors of low birth weight
id
infant’s unique identification number
Dependent variable (DV) or outcome
low
Low birth weight (outcome)0 = birth weight >2500 g (normal)
1 = birth weight < 2500 g (low))
bwt
actual infant birth weight in grams (ignore for now)
Independent variables (IV) or predictors
age
Age of mother, in yearslwt
Mother’s weight at last menstrual period, in poundsrace
Race: 1 = White, 2 = Black, 3 = Othersmoke
Smoking status during pregnancy:1 = Yes, 0 = Noptl
History of premature labor: 0 = None, 1 = One, 2 = two, 3 = threeht
History of hypertension: 1 = Yes, 0 = Noui
Uterine irritability: 1 = Yes, 0 = Noftv
Number of physician visits in 1st trimester: 0 = None, 1 = One, … 6 = six
The data is saved in a text file (.txt
) without any labels.
df_txt <- read.table("https://raw.githubusercontent.com/CEHS-research/data/master/Regression/lowbwt.txt",
header = TRUE,
sep = "",
na.strings = "NA",
dec = ".",
strip.white = TRUE)
tibble::glimpse(df_txt)
Rows: 189
Columns: 11
$ id <int> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101…
$ low <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ age <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18, …
$ lwt <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95, 1…
$ race <int> 2, 3, 1, 1, 1, 3, 1, 3, 1, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 1, 3, 1…
$ smoke <int> 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0…
$ ptl <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ ht <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ ui <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1…
$ ftv <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2, 3…
$ bwt <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 2722…
16.1.3 Wrangle Data
df_mom <- df_txt %>%
dplyr::mutate(id = factor(id)) %>%
dplyr::mutate(low = low %>%
factor() %>%
forcats::fct_recode("birth weight >2500 g (normal)" = "0",
"birth weight < 2500 g (low)" = "1")) %>%
dplyr::mutate(race = race %>%
factor() %>%
forcats::fct_recode("White" = "1",
"Black" = "2",
"Other" = "3")) %>%
dplyr::mutate(ptl_any = as.numeric(ptl > 0)) %>% # collapse into 0 = none vs. 1 = at least one
dplyr::mutate(ptl = factor(ptl)) %>% # declare the number of pre-term labors to be a factor: 0, 1, 2, 3
dplyr::mutate_at(vars(smoke, ht, ui, ptl_any), # declare all there variables to be factors with the same two levels
factor,
levels = 0:1,
labels = c("No", "Yes"))
Display the structure of the ‘clean’ version of the dataset
'data.frame': 189 obs. of 12 variables:
$ id : Factor w/ 189 levels "4","10","11",..: 60 61 62 63 64 65 66 67 68 69 ...
$ low : Factor w/ 2 levels "birth weight >2500 g (normal)",..: 1 1 1 1 1 1 1 1 1 1 ...
$ age : int 19 33 20 21 18 21 22 17 29 26 ...
$ lwt : int 182 155 105 108 107 124 118 103 123 113 ...
$ race : Factor w/ 3 levels "White","Black",..: 2 3 1 1 1 3 1 3 1 1 ...
$ smoke : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 1 2 2 ...
$ ptl : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
$ ui : Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 1 1 1 1 ...
$ ftv : int 0 3 1 2 0 0 1 1 1 0 ...
$ bwt : int 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
$ ptl_any: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
Rows: 189
Columns: 12
$ id <fct> 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 1…
$ low <fct> birth weight >2500 g (normal), birth weight >2500 g (normal), …
$ age <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18…
$ lwt <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95,…
$ race <fct> Black, Other, White, White, White, Other, White, Other, White,…
$ smoke <fct> No, No, Yes, Yes, Yes, No, No, No, Yes, Yes, No, No, No, No, Y…
$ ptl <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
$ ht <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, No, N…
$ ui <fct> Yes, No, No, Yes, Yes, No, No, No, No, No, No, No, No, Yes, No…
$ ftv <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2,…
$ bwt <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 27…
$ ptl_any <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, N…
16.2 EXPLORATORY DATA ANALYSIS
16.2.1 Missing Data
df_mom %>%
dplyr::select("Low Birth Weight Delivery" = low,
"Age, years" = age,
"Weight, pounds" = lwt,
"Race" = race,
"Smoking During pregnancy" = smoke,
"History of Premature Labor, any" = ptl_any,
"History of Premature Labor, number" = ptl,
"History of Hypertension" = ht,
"Uterince Irritability" = ui,
"1st Tri Dr Visits" = ftv) %>%
naniar::miss_var_summary() %>%
dplyr::select(Variable = variable,
n = n_miss) %>%
flextable::flextable() %>%
apaSupp::theme_apa(caption = "Missing Data by Variable")
Variable | n |
---|---|
Low Birth Weight Delivery | 0 |
Age, years | 0 |
Weight, pounds | 0 |
Race | 0 |
Smoking During pregnancy | 0 |
History of Premature Labor, any | 0 |
History of Premature Labor, number | 0 |
History of Hypertension | 0 |
Uterince Irritability | 0 |
1st Tri Dr Visits | 0 |
16.2.2 Summary
df_mom %>%
dplyr::select("Low Birth Weight Delivery" = low,
"Race" = race,
"Smoking During pregnancy" = smoke,
"History of Premature Labor, any" = ptl_any,
"History of Premature Labor, number" = ptl,
"History of Hypertension" = ht,
"Uterince Irritability" = ui) %>%
apaSupp::tab_freq(caption = "Summary of Categorical Variables")
Statistic | ||
---|---|---|
Low Birth Weight Delivery | ||
birth weight >2500 g (normal) | 130 (68.8%) | |
birth weight < 2500 g (low) | 59 (31.2%) | |
Race | ||
White | 96 (50.8%) | |
Black | 26 (13.8%) | |
Other | 67 (35.4%) | |
Smoking During pregnancy | ||
No | 115 (60.8%) | |
Yes | 74 (39.2%) | |
History of Premature Labor, any | ||
No | 159 (84.1%) | |
Yes | 30 (15.9%) | |
History of Premature Labor, number | ||
0 | 159 (84.1%) | |
1 | 24 (12.7%) | |
2 | 5 (2.6%) | |
3 | 1 (0.5%) | |
History of Hypertension | ||
No | 177 (93.7%) | |
Yes | 12 (6.3%) | |
Uterince Irritability | ||
No | 161 (85.2%) | |
Yes | 28 (14.8%) |
df_mom %>%
dplyr::select("Age, yrs" = age,
"Weight, lbs" = lwt,
"1st Tri Dr Visits" = ftv) %>%
apaSupp::tab_desc(caption = "Summary of Continuous Variables")
NA | M | SD | min | Q1 | Mdn | Q3 | max | |
---|---|---|---|---|---|---|---|---|
Age, yrs | 0 | 23.24 | 5.30 | 14 | 19.00 | 23 | 26.00 | 45 |
Weight, lbs | 0 | 129.81 | 30.58 | 80 | 110.00 | 121 | 140.00 | 250 |
1st Tri Dr Visits | 0 | 0.79 | 1.06 | 0 | 0.00 | 0 | 1.00 | 6 |
Note. N = 189. NA = not available or missing; Mdn = median; Q1 = 25th percentile; Q3 = 75th percentile. |
16.3 UNADJUSTED
Unadjusted Models
16.3.1 Fit Models
fit_glm_age <- glm(low ~ age, family = binomial(link = "logit"), data = df_mom)
fit_glm_lwt <- glm(low ~ lwt, family = binomial(link = "logit"), data = df_mom)
fit_glm_race <- glm(low ~ race, family = binomial(link = "logit"), data = df_mom)
fit_glm_smoke <- glm(low ~ smoke, family = binomial(link = "logit"), data = df_mom)
fit_glm_ptl <- glm(low ~ ptl_any, family = binomial(link = "logit"), data = df_mom)
fit_glm_ht <- glm(low ~ ht, family = binomial(link = "logit"), data = df_mom)
fit_glm_ui <- glm(low ~ ui, family = binomial(link = "logit"), data = df_mom)
fit_glm_ftv <- glm(low ~ ftv, family = binomial(link = "logit"), data = df_mom)
16.3.2 Parameter Tables
Note: the parameter estimates here are for the LOGIT scale, not the odds ration (OR) or even the probability.
apaSupp::tab_glms(list(fit_glm_age, fit_glm_lwt, fit_glm_race, fit_glm_smoke),
narrow = TRUE,
fit = NA,
pr2 = "tjur")
| Model 1 | Model 2 | Model 3 | Model 4 | ||||
---|---|---|---|---|---|---|---|---|
Variable | OR | 95% CI | OR | 95% CI | OR | 95% CI | OR | 95% CI |
age | 0.95 | [0.89, 1.01] | ||||||
lwt | 0.99 | [0.97, 1.00]* | ||||||
race | ||||||||
White | — | — | ||||||
Black | 2.33 | [0.93, 5.77] | ||||||
Other | 1.89 | [0.96, 3.76] | ||||||
smoke | ||||||||
No | — | — | ||||||
Yes | 2.02 | [1.08, 3.80]* | ||||||
pseudo-R² | .014 | .031 | .026 | .026 | ||||
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared | ||||||||
* p < .05. ** p < .01. *** p < .001. |
apaSupp::tab_glms(list(fit_glm_ptl, fit_glm_ht, fit_glm_ui, fit_glm_ftv),
narrow = TRUE,
fit = NA,
pr2 = "tjur")
| Model 1 | Model 2 | Model 3 | Model 4 | ||||
---|---|---|---|---|---|---|---|---|
Variable | OR | 95% CI | OR | 95% CI | OR | 95% CI | OR | 95% CI |
ptl_any | ||||||||
No | — | — | ||||||
Yes | 4.32 | [1.94, 9.95]*** | ||||||
ht | ||||||||
No | — | — | ||||||
Yes | 3.37 | [1.03, 11.83]* | ||||||
ui | ||||||||
No | — | — | ||||||
Yes | 2.58 | [1.13, 5.88]* | ||||||
ftv | 0.87 | [0.63, 1.17] | ||||||
pseudo-R² | .073 | .023 | .029 | .004 | ||||
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted by Tjur's psuedo R-squared | ||||||||
* p < .05. ** p < .01. *** p < .001. |
16.4 MAIN EFFECTS ONLY
Main-effects multiple logistic regression model
fit_glm_mains <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui,
family = binomial(link = "logit"),
data = df_mom)
Odds Ratio | Logit Scale | ||||||||
---|---|---|---|---|---|---|---|---|---|
Variable | OR | 95% CI | b | (SE) | Wald | LRT | VIF | ||
age | 0.96 | [0.89, 1.04] | -0.04 | (0.04) | .318 | .313 | 1.09 | ||
lwt | 0.99 | [0.97, 1.00] | -0.01 | (0.01) | .034* | .025* | 1.31 | ||
race | .041* | 1.52 | |||||||
White | — | — | — | — | |||||
Black | 3.36 | [1.19, 9.73] | 1.213 | (0.53) | .023* | ||||
Other | 2.23 | [0.94, 5.50] | .804 | (0.45) | .073 | ||||
smoke | 2.33 | [1.06, 5.29] | 0.85 | (0.41) | .038* | .036* | 1.35 | ||
ptl_any | 3.39 | [1.38, 8.60] | 1.22 | (0.46) | .008** | .008** | 1.10 | ||
ht | 6.29 | [1.64, 27.26] | 1.84 | (0.70) | .009** | .007** | 1.16 | ||
ui | 2.04 | [0.81, 5.06] | 0.71 | (0.46) | .125 | .128 | 1.05 | ||
(Intercept) | 0.64 | (1.23) | .605 | ||||||
pseudo-R² | .190 | ||||||||
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R². | |||||||||
* p < .05. ** p < .01. *** p < .001. |
16.5 INTERACTIONS
Before removing non-significant main effects, test plausible interactions
Try the following interactions:
Age and Weight
Age and Smoking
Weight and Smoking
fit_glm_mains_aw <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:lwt,
family = binomial(link = "logit"),
data = df_mom)
fit_glm_mains_as <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + age:smoke,
family = binomial(link = "logit"),
data = df_mom)
fit_glm_mains_ws <- glm(low ~ age + lwt + race + smoke + ptl_any + ht + ui + lwt:smoke,
family = binomial(link = "logit"),
data = df_mom)
apaSupp::tab_glms(list("Age-Weight" = fit_glm_mains_aw,
"Age-Smoking" = fit_glm_mains_as,
"Weight-Smoking" = fit_glm_mains_ws),
show_single_row = c("smoke", "ptl_any", "ht", "ui"),
narrow = TRUE)
| Age-Weight | Age-Smoking | Weight-Smoking | |||
---|---|---|---|---|---|---|
Variable | OR | 95% CI | OR | 95% CI | OR | 95% CI |
age | 0.94 | [0.67, 1.33] | 0.93 | [0.84, 1.03] | 0.96 | [0.89, 1.04] |
lwt | 0.98 | [0.92, 1.04] | 0.99 | [0.97, 1.00]* | 0.98 | [0.95, 1.00]* |
race | ||||||
White | — | — | — | — | — | — |
Black | 3.34 | [1.18, 9.71]* | 3.08 | [1.06, 9.11]* | 3.42 | [1.21, 9.95]* |
Other | 2.22 | [0.93, 5.50] | 2.16 | [0.90, 5.33] | 2.05 | [0.85, 5.07] |
smoke | 2.32 | [1.05, 5.28]* | 0.55 | [0.02, 17.99] | 0.32 | [0.01, 9.36] |
ptl_any | 3.41 | [1.38, 8.66]** | 3.31 | [1.35, 8.36]** | 3.54 | [1.44, 9.02]** |
ht | 6.29 | [1.64, 27.28]** | 6.43 | [1.67, 28.04]** | 5.72 | [1.48, 24.94]* |
ui | 2.04 | [0.81, 5.09] | 2.19 | [0.86, 5.56] | 2.23 | [0.88, 5.63] |
age * lwt | 1.00 | [1.00, 1.00] | ||||
age * smoke | ||||||
age * Yes | 1.07 | [0.92, 1.24] | ||||
lwt * smoke | ||||||
lwt * Yes | 1.02 | [0.99, 1.04] | ||||
Fit Metrics | ||||||
AIC | 216.8 | 216.1 | 215.4 | |||
BIC | 249.2 | 248.6 | 247.9 | |||
pseudo-R² | ||||||
Tjur | .190 | .192 | .196 | |||
McFadden | .161 | .164 | .167 | |||
Note. N = 189. CI = confidence interval.Coefficient of deterination estiamted with both Tjur and McFadden's psuedo R-squared | ||||||
* p < .05. ** p < .01. *** p < .001. |
apaSupp::tab_glm_fits(list("Only Mains" = fit_glm_mains,
"Age-Weight" = fit_glm_mains_aw,
"Age-Smoking" = fit_glm_mains_as,
"Weight-Smoking" = fit_glm_mains_ws))
pseudo- | |||||||
---|---|---|---|---|---|---|---|
Model | N | k | McFadden | Tjur | AIC | BIC | RMSE |
Only Mains | 189 | 9 | .161 | .190 | 214.83 | 244.01 | 0.42 |
Age-Weight | 189 | 10 | .161 | .190 | 216.82 | 249.23 | 0.42 |
Age-Smoking | 189 | 10 | .164 | .192 | 216.14 | 248.55 | 0.42 |
Weight-Smoking | 189 | 10 | .167 | .196 | 215.44 | 247.86 | 0.42 |
Note. k = number of parameters estimated in each model. Larger values indicated better performance. Smaller values indicated better performance for Akaike's Information Criteria (AIC), Bayesian information criteria (BIC), and Root Mean Squared Error (RMSE). |
# A tibble: 2 × 5
`Resid. Df` `Resid. Dev` Df Deviance `Pr(>Chi)`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 180 197. NA NA NA
2 179 197. 1 0.0170 0.896
# A tibble: 2 × 5
`Resid. Df` `Resid. Dev` Df Deviance `Pr(>Chi)`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 180 197. NA NA NA
2 179 196. 1 0.699 0.403
# A tibble: 2 × 5
`Resid. Df` `Resid. Dev` Df Deviance `Pr(>Chi)`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 180 197. NA NA NA
2 179 195. 1 1.39 0.238
16.6 PARSAMONY
No interactions are significant Remove non-significant main effects
Since the mother’s age is theoretically a meaningful variable, it should probably be retained.
Remove “UI” since its not significant
fit_glm_trim <- glm(low ~ age + lwt + race + smoke + ptl_any + ht,
family = binomial(link = "logit"),
data = df_mom)
Odds Ratio | Logit Scale | ||||||||
---|---|---|---|---|---|---|---|---|---|
Variable | OR | 95% CI | b | (SE) | Wald | LRT | VIF | ||
age | 0.96 | [0.89, 1.03] | -0.04 | (0.04) | .255 | .249 | 1.09 | ||
lwt | 0.98 | [0.97, 1.00] | -0.02 | (0.01) | .028* | .020* | 1.29 | ||
race | .044* | 1.49 | |||||||
White | — | — | — | — | |||||
Black | 3.22 | [1.13, 9.31] | 1.168 | (0.53) | .028* | ||||
Other | 2.26 | [0.96, 5.50] | .815 | (0.44) | .066 | ||||
smoke | .032* | 1.35 | |||||||
No | — | — | — | — | |||||
Yes | 2.36 | [1.08, 5.32] | .9 | (0.40) | .034* | ||||
ptl_any | .003** | 1.08 | |||||||
No | — | — | — | — | |||||
Yes | 3.80 | [1.57, 9.53] | 1.3 | (0.46) | .004** | ||||
ht | .011* | 1.15 | |||||||
No | — | — | — | — | |||||
Yes | 5.70 | [1.49, 24.76] | 1.7 | (0.70) | .013* | ||||
(Intercept) | 0.92 | (1.20) | .442 | ||||||
pseudo-R² | .180 | ||||||||
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R². | |||||||||
* p < .05. ** p < .01. *** p < .001. |
16.7 CENTER & SCALE
Since the mother’s age is theoretically a meaningful variable, it should probably be retained.
Revise so that age is interpreted in 5-year and pre-pregnancy weight in 20 lb increments and the intercept has meaning.
fit_glm_final <- glm(low ~ I((age - 20)/5) + I((lwt - 125)/20) + race + smoke + ptl_any + ht,
family = binomial(link = "logit"),
data = df_mom)
apaSupp::tab_glm(fit_glm_final,
var_labels = c("I((age - 20)/5)" = "Age, 5 yr",
"I((lwt - 125)/20)" = "Pre-Preg Weight, 20 lb",
race = "Race",
smoke = "Smoker",
ptl_any = "Prior Preterm Labor",
ht = "Hx Hypertension"),
p_note = "apa13",
show_single_row = c("smoke", "ptl_any", "ht"),
general_note = "Centering for age (20 yr) and weight (125 lbs)")
Odds Ratio | Logit Scale | ||||||||
---|---|---|---|---|---|---|---|---|---|
Variable | OR | 95% CI | b | (SE) | Wald | LRT | VIF | ||
Age, 5 yr | 0.81 | [0.55, 1.16] | -0.21 | (0.19) | .255 | .249 | 1.09 | ||
Pre-Preg Weight, 20 lb | 0.73 | [0.55, 0.95] | -0.31 | (0.14) | .028* | .020* | 1.29 | ||
Race | .044* | 1.49 | |||||||
White | — | — | — | — | |||||
Black | 3.22 | [1.13, 9.31] | 1.168 | (0.53) | .028* | ||||
Other | 2.26 | [0.96, 5.50] | .815 | (0.44) | .066 | ||||
Smoker | 2.36 | [1.08, 5.32] | 0.86 | (0.40) | .034* | .032* | 1.35 | ||
Prior Preterm Labor | 3.80 | [1.57, 9.53] | 1.33 | (0.46) | .004** | .003** | 1.08 | ||
Hx Hypertension | 5.70 | [1.49, 24.76] | 1.74 | (0.70) | .013* | .011* | 1.15 | ||
(Intercept) | -1.86 | (0.41) | < .001*** | ||||||
pseudo-R² | .180 | ||||||||
Note. N = 189. CI = confidence interval; VIF = variance inflation factor. Significance denotes Wald t-tests for individual parameter estimates, as well as Likelihood Ratio Tests (LRT) for single-predictor deletion. Coefficient of deterination displays Tjur's pseudo-R². Centering for age (20 yr) and weight (125 lbs) | |||||||||
* p < .05. *** p < .001. |
16.7.1 Marginal Model Plot
16.7.1.1 Focus on: Mother’s Age, weight, and race
effects::Effect(focal.predictors = c("age", "lwt", "race"),
mod = fit_glm_final,
xlevels = list(age = c(20, 30, 40),
lwt = seq(from = 80, to = 250, by = 5))) %>%
data.frame() %>%
dplyr::mutate(age_labels = glue::glue("Mother Age: {age}")) %>%
ggplot(aes(x = lwt,
y = fit)) +
geom_line(aes(color = race,
linetype = race),
size = 1) +
theme_bw() +
facet_grid(.~ age_labels) +
labs(title = "Risk of Low Birth Weight",
subtitle = "Illustates risk given mother is a non-smoker, without a history of pre-term labor or hypertension",
x = "Mother's Weight Pre-Pregnancy, pounds",
y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
color = "Mother's Race",
linetype = "Mother's Race") +
theme(legend.position = c(1, 1),
legend.justification = c(1.1, 1.1),
legend.background = element_rect(color = "black"),
legend.key.width = unit(2, "cm")) +
scale_linetype_manual(values = c("longdash", "dotted", "solid")) +
scale_color_manual(values = c( "coral2", "dodger blue", "gray50"))
16.7.1.2 Focus on: Mother’s weight and smoking status during pregnancy, as well as history of any per-term labor and hypertension
effects::Effect(focal.predictors = c("lwt", "smoke", "ptl_any", "ht"),
fixed.predictors = list(age = 20),
mod = fit_glm_final,
xlevels = list(lwt = seq(from = 80, to = 250, by = 5))) %>%
data.frame() %>%
dplyr::mutate(smoke = forcats::fct_rev(smoke)) %>%
dplyr::mutate(ptl_any_labels = glue::glue("History of Preterm Labor: {ptl_any}")) %>%
dplyr::mutate(ht_labels = glue::glue("History of Hypertension: {ht}") %>% forcats::fct_rev()) %>%
ggplot(aes(x = lwt,
y = fit)) +
geom_line(aes(color = smoke,
linetype = smoke),
size = 1) +
theme_bw() +
facet_grid(ht_labels ~ ptl_any_labels) +
labs(title = "Risk of Low Birth Weight",
subtitle = "Illustates risk given the mother is 20 years old and white",
x = "Mother's Weight Pre-Pregnancy, pounds",
y = "Predicted Probability\nBaby has Low Birth Weight (< 2500 grams)",
color = "Mother Smoked",
linetype = "Mother Smoked") +
theme(legend.position = c(1, .5),
legend.justification = c(1.1, 1.15),
legend.background = element_rect(color = "black"),
legend.key.width = unit(1.5, "cm")) +
scale_linetype_manual(values = c("longdash", "solid")) +
scale_color_manual(values = c( "coral2", "dodger blue"))