4 D&H Ch3b - Multiple Regression: “Sport Skill”
Compiled: October 15, 2025
Darlington & Hayes, Chapter 3’s second example
# install.packages("remotes")
# remotes::install_github("sarbearschwartz/apaSupp") # 9/17/2025
library(tidyverse)   
library(flextable)
library(apaSupp)       # not on CRAN, get from GitHub (above) 
library(GGally)    
library(ggpubr)  
library(interactions) 
library(effectsize)
library(performance)
library(ggResidpanel)4.1 PURPOSE
4.1.2 Data Description
4.1.2.1 Variables
Independent Variables (IV, \(X\))
* soft skill at softball
* basket skill at basketball
Dependent Variable (DV, \(Y\))
* pref On a scale from 1 to 9, which sport do you prefer?”
+ 1 = much prefer softball
+ 5 = no preference
+ 9 = much prefer basketball
df_sport <- tibble::tribble(~id, ~soft, ~basket, ~pref,
                            1,   4, 17, 1,
                            2,  56, 60, 3,
                            3,  25,  3, 8,
                            4,  50, 52, 4,
                            5,   5, 16, 2,
                            6,  72, 84, 2,
                            7, 100, 95, 5,
                            8,  39, 20, 8,
                            9,  81, 75, 5,
                            10,  61, 47, 7)df_sport %>% 
  dplyr::select("ID" = id,
                "Softball Skill" = soft,
                "Basketball Skill" = basket,
                "Preference Score" = pref) %>% 
  flextable::flextable() %>% 
  apaSupp::theme_apa(caption = "D&H TAble 3.5 (pg 78) Skill at Softball, Basketball, and Preference") %>% 
  flextable::colformat_double(digits = 0)ID  | Softball Skill  | Basketball Skill  | Preference Score  | 
|---|---|---|---|
1  | 4  | 17  | 1  | 
2  | 56  | 60  | 3  | 
3  | 25  | 3  | 8  | 
4  | 50  | 52  | 4  | 
5  | 5  | 16  | 2  | 
6  | 72  | 84  | 2  | 
7  | 100  | 95  | 5  | 
8  | 39  | 20  | 8  | 
9  | 81  | 75  | 5  | 
10  | 61  | 47  | 7  | 
4.2 EXPLORATORY DATA ANALYSIS
4.2.1 Univariate Statistics
Center: mean and median Spread: standard deviation, range (max - min), interquartile range (Q3 - Q1)
df_sport %>% 
  dplyr::select("Softball Skill" = soft,
                "Basketball Skill" = basket,
                "Preference Score" = pref) %>%
  apaSupp::tab_desc(caption = "Summary Statistics")NA  | M  | SD  | min  | Q1  | Mdn  | Q3  | max  | |
|---|---|---|---|---|---|---|---|---|
Softball Skill  | 0  | 49.30  | 31.59  | 4.00  | 28.50  | 53.00  | 69.25  | 100.00  | 
Basketball Skill  | 0  | 46.90  | 31.93  | 3.00  | 17.75  | 49.50  | 71.25  | 95.00  | 
Preference Score  | 0  | 4.50  | 2.55  | 1.00  | 2.25  | 4.50  | 6.50  | 8.00  | 
Note. N = 10. NA = not available or missing; Mdn = median; Q1 = 25th percentile; Q3 = 75th percentile.  | ||||||||
4.2.3 Bivariate Statistics
df_sport %>% 
  dplyr::select("Softball Skill" = soft,
                "Basketball Skill" = basket,
                "Preference Score" = pref) %>%
  apaSupp::tab_cor(caption = "Unadjusted, Pairwise Correlations")Variable Pair  | r  | p  | |
|---|---|---|---|
Softball Skill  | Basketball Skill  | .920  | < .001***  | 
Softball Skill  | Preference Score  | .210  | .562  | 
Basketball Skill  | Preference Score  | -.190  | .590  | 
Note. N = 10. r = Pearson's Product-Moment correlation coefficient.  | |||
* p < .05. ** p < .01. *** p < .001.  | |||
4.2.4 Bivariate Visualization
df_sport %>% 
  dplyr::select("Softball Skill" = soft,
                "Basketball Skill" = basket,
                "Preference Score" = pref) %>%
  data.frame %>% 
  GGally::ggscatmat() +
  theme_bw()
df_sport %>% 
  ggplot(aes(x = soft,        
             y = pref)) +   
  geom_point() +                    
  geom_smooth(method = "lm",
              formula = y ~ x) +
  ggpubr::stat_regline_equation(label.x = 75,
                                label.y = 1,
                                size = 6) +
  theme_bw() +
  labs(x = "Softball Skill",
       y = "Preference")
df_sport %>% 
  ggplot(aes(x = basket,        
             y = pref)) +   
  geom_point() +                    
  geom_smooth(method = "lm",
              formula = y ~ x) +
  ggpubr::stat_regline_equation(label.x = 75,
                                label.y = 1,
                                size = 6) +
  theme_bw() +
  labs(x = "Basketball Skill",
       y = "Preference")
4.3 REGRESSION ANALYSIS
- The dependent variable (DV) is preference (\(Y\))
 - The independent variable (IVs) are skill levels (\(X\))
 
4.3.1 Fit the Models
fit_lm_soft <- lm(pref ~ soft,
                  data = df_sport)  
fit_lm_basket <- lm(pref ~ basket,
                    data = df_sport)
fit_lm_both <- lm(pref ~ soft + basket,
                  data = df_sport)tab_lm3 <- apaSupp::tab_lms(list("Softball"   = fit_lm_soft, 
                                 "Basketball" = fit_lm_basket, 
                                 "Both"       = fit_lm_both),
                            var_labels = c("soft" = "Softball Skill",
                                           "basket" = "Basketball Skill"),
                            caption = "Parameter Estimates for Sport Preference Regression on Skill Level in Softball and Basketball",
                            general_note = "Dependent variable is preference rating on a scale of 1 (much prefer softball) to  9 (much prefer basketball).  Both softball and Baksetball skill levels are on a scale of 0 to 100.",
                            narrow = TRUE,
                            d = 3) 
tab_lm3
  | Softball  | Basketball  | Both  | |||
|---|---|---|---|---|---|---|
Variable  | b  | (SE)  | b  | (SE)  | b  | (SE)  | 
(Intercept)  | 3.669  | (1.610)  | 5.228  | (1.546) **  | 3.899  | (0.200) ***  | 
Softball Skill  | 0.017  | (0.028)  | 0.198  | (0.009) ***  | ||
Basketball Skill  | -0.016  | (0.028)  | -0.195  | (0.009) ***  | ||
AIC  | 51.597  | 51.658  | 10.495  | |||
BIC  | 52.504  | 52.565  | 11.706  | |||
R²  | .0437  | .0378  | .9872  | |||
Adjusted R²  | -.0759  | -.0824  | .9835  | |||
Note. Dependent variable is preference rating on a scale of 1 (much prefer softball) to 9 (much prefer basketball). Both softball and Baksetball skill levels are on a scale of 0 to 100.  | ||||||
* p < .05. ** p < .01. *** p < .001.  | ||||||
We call a set of regressors complementary if \(R^2\) for the set exceeds the sum of the individual values of \(r^2_{YX}\) . Thus, complementarity and collinearity are opposites, though either can occur only when regressors in a set are intercorrelated.
4.3.2 Visualize
interactions::interact_plot(model = fit_lm_both,
                            pred = soft,
                            modx = basket,
                            modx.values = c(25, 50, 75),
                            legend.main = "Basketball Skill\n(0-100)",
                            interval = TRUE) +
  theme_bw() +
  labs(x = "Softball Skill, (0-100)",
       y = "Estimated Marginal Mean Preference\n1 = much prefer softball\n9 = much prefer basketball") +
  coord_cartesian(ylim = c(1, 9)) +
  scale_y_continuous(breaks = 1:9) +
  theme(legend.position = c(0, 1),
        legend.justification = c(-.1, 1.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))
4.3.3 Semipartial Correlation
b  | (SE)  | p  | 
  | 
  | 
  | |
|---|---|---|---|---|---|---|
(Intercept)  | 3.90  | (0.20)  | < .001***  | |||
soft  | 0.20  | (0.01)  | < .001***  | 2.45  | .949  | .987  | 
basket  | -0.20  | (0.01)  | < .001***  | -2.44  | .943  | .987  | 
R²  | .987  | |||||
Adjusted R²  | .983  | |||||
Note. N = 10. = standardize coefficient; = semi-partial correlation; = partial correlation; p = significance from Wald t-test for parameter estimate.  | ||||||
* p < .05. ** p < .01. *** p < .001.  | ||||||
b  | (SE)  | p  | 
  | 
  | 
  | |
|---|---|---|---|---|---|---|
(Intercept)  | 3.90  | (0.20)  | < .001***  | |||
soft  | 0.20  | (0.01)  | < .001***  | 2.45  | .949  | .987  | 
basket  | -0.20  | (0.01)  | < .001***  | -2.44  | .943  | .987  | 
R²  | .987  | |||||
Adjusted R²  | .983  | |||||
Note. N = 10. = standardize coefficient; = semi-partial correlation; = partial correlation; p = significance from Wald t-test for parameter estimate.  | ||||||
* p < .05. ** p < .01. *** p < .001.  | ||||||
# A tibble: 2 × 5
  Term   r2_semipartial    CI CI_low CI_high
  <chr>           <dbl> <dbl>  <dbl>   <dbl>
1 soft            0.949  0.95  0.756       1
2 basket          0.943  0.95  0.737       1
4.3.4 Venn Diagram - Variances
https://freetools.touchpoint.com/venn-diagram-template-generator
[1] 6.5
[1] 6.216137
[1] 6.254138
[1] 0.08349447
[1] 6.416506
[1] 0.1623674
[1] 0.2003681
[1] 6.05377
4.3.5 Venn Diagram - Proportions
Total Variance in Preference Explained by Both Skills
[1] 0.9871547
Variance in Preference Uniquely Explained by Softball skills, across all Basketball skill levels
[1] 0.0249796
Variance in Preference Uniquely Explained by Basketball skills, across all Softball skill levels
[1] 0.03082587
Variance in Preference Explained by Softball skills, when holding Basketball skill constant
[1] 0.6604009
[1] 0.7058631



