Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Last active April 15, 2025 16:40
Show Gist options
  • Save andrewheiss/604f435952f994f61dcf562894cfd5c5 to your computer and use it in GitHub Desktop.
Save andrewheiss/604f435952f994f61dcf562894cfd5c5 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(broom)
library(parameters)
library(marginaleffects)
library(palmerpenguins)

penguins <- penguins |> 
  drop_na(sex) |> 
  mutate(is_gentoo = species == "Gentoo")

thing1 <- t.test(body_mass_g ~ sex, data = penguins)

thing1_tidied <- tidy(thing1)

thing1_tidied
#> # A tibble: 1 × 10
#>   estimate estimate1 estimate2 statistic  p.value parameter conf.low conf.high
#>      <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>     <dbl>
#> 1    -683.     3862.     4546.     -8.55 4.79e-16      324.    -841.     -526.
#> # ℹ 2 more variables: method <chr>, alternative <chr>
thing1_tidied$estimate
#> [1] -683.4118

# The difference in means is `{r} thing1_tidied$estimate` grams

asdf <- model_parameters(thing1)
asdf
#> Welch Two Sample t-test
#> 
#> Parameter   | Group | sex = female | sex = male | Difference
#> ------------------------------------------------------------
#> body_mass_g |   sex |      3862.27 |    4545.68 |    -683.41
#> 
#> Parameter   |             95% CI | t(323.90) |      p
#> -----------------------------------------------------
#> body_mass_g | [-840.58, -526.25] |     -8.55 | < .001
#> 
#> Alternative hypothesis: true difference in means between group female and group male is not equal to 0

model1 <- lm(body_mass_g ~ flipper_length_mm + bill_length_mm + species, data = penguins)
summary(model1)
#> 
#> Call:
#> lm(formula = body_mass_g ~ flipper_length_mm + bill_length_mm + 
#>     species, data = penguins)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -811.52 -227.81  -33.74  216.73 1053.26 
#> 
#> Coefficients:
#>                    Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)       -3864.073    533.592  -7.242 3.18e-12 ***
#> flipper_length_mm    27.544      3.209   8.583 3.77e-16 ***
#> bill_length_mm       60.117      7.207   8.341 2.06e-15 ***
#> speciesChinstrap   -732.417     82.063  -8.925  < 2e-16 ***
#> speciesGentoo       113.254     89.206   1.270    0.205    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 339.6 on 328 degrees of freedom
#> Multiple R-squared:  0.8243, Adjusted R-squared:  0.8222 
#> F-statistic: 384.7 on 4 and 328 DF,  p-value: < 2.2e-16
qwer <- tidy(model1, conf.int = TRUE)
qwer
#> # A tibble: 5 × 7
#>   term              estimate std.error statistic  p.value conf.low conf.high
#>   <chr>                <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
#> 1 (Intercept)        -3864.     534.       -7.24 3.18e-12  -4914.    -2814. 
#> 2 flipper_length_mm     27.5      3.21      8.58 3.77e-16     21.2      33.9
#> 3 bill_length_mm        60.1      7.21      8.34 2.06e-15     45.9      74.3
#> 4 speciesChinstrap    -732.      82.1      -8.93 3.22e-17   -894.     -571. 
#> 5 speciesGentoo        113.      89.2       1.27 2.05e- 1    -62.2     289.

qwer$estimate
#> [1] -3864.07323    27.54429    60.11732  -732.41667   113.25418

thing_to_include <- qwer |> filter(term == "(Intercept)") |> pull(estimate)

# The intercept is `{r} thing_to_include`

model_parameters(model1)
#> Parameter           | Coefficient |     SE |               95% CI | t(328) |      p
#> -----------------------------------------------------------------------------------
#> (Intercept)         |    -3864.07 | 533.59 | [-4913.77, -2814.38] |  -7.24 | < .001
#> flipper length mm   |       27.54 |   3.21 | [   21.23,    33.86] |   8.58 | < .001
#> bill length mm      |       60.12 |   7.21 | [   45.94,    74.30] |   8.34 | < .001
#> species [Chinstrap] |     -732.42 |  82.06 | [ -893.85,  -570.98] |  -8.93 | < .001
#> species [Gentoo]    |      113.25 |  89.21 | [  -62.23,   288.74] |   1.27 | 0.205
#> 
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#>   using a Wald t-distribution approximation.

gof <- glance(model1)

model1 |> 
  tidy(conf.int = TRUE) |> 
  filter(term != "(Intercept)") |> 
  ggplot(aes(x = estimate, y = term)) +
  annotate(geom = "text", label = round(gof$r.squared, 2), x = -600, y = "speciesGentoo") +
  geom_vline(xintercept = 0) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high))

model2 <- glm(is_gentoo ~ bill_length_mm + sex, family = binomial(), data = penguins)
tidy(model2)
#> # A tibble: 3 × 5
#>   term           estimate std.error statistic  p.value
#>   <chr>             <dbl>     <dbl>     <dbl>    <dbl>
#> 1 (Intercept)     -12.4      1.48       -8.41 4.15e-17
#> 2 bill_length_mm    0.275    0.0335      8.21 2.13e-16
#> 3 sexmale          -1.03     0.302      -3.42 6.25e- 4
model_parameters(model2, exponentiate = TRUE)
#> Parameter      | Odds Ratio |       SE |       95% CI |     z |      p
#> ----------------------------------------------------------------------
#> (Intercept)    |   3.92e-06 | 5.81e-06 | [0.00, 0.00] | -8.41 | < .001
#> bill length mm |       1.32 |     0.04 | [1.24, 1.41] |  8.21 | < .001
#> sex [male]     |       0.36 |     0.11 | [0.19, 0.63] | -3.42 | < .001
#> 
#> Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
#>   computed using a Wald z-distribution approximation.

plot_predictions(model2, condition = c("bill_length_mm", "sex"))

plot_predictions(model2, condition = c("bill_length_mm", "sex")) +
  geom_vline(xintercept = c(40, 55))

avg_slopes(
  model2,
  newdata = datagrid(bill_length_mm = c(40, 55), sex = unique),
  by = c("bill_length_mm", "sex"),
  variables = "bill_length_mm"
)
#> 
#>            Term bill_length_mm    sex Estimate Std. Error    z Pr(>|z|)    S
#>  bill_length_mm             40 female   0.0427    0.00531 8.05   <0.001 50.1
#>  bill_length_mm             40 male     0.0198    0.00378 5.24   <0.001 22.6
#>  bill_length_mm             55 female   0.0163    0.00431 3.79   <0.001 12.7
#>  bill_length_mm             55 male     0.0369    0.00500 7.38   <0.001 42.5
#>    2.5 % 97.5 %
#>  0.03233 0.0531
#>  0.01241 0.0272
#>  0.00789 0.0248
#>  0.02713 0.0467
#> 
#> Type:  response 
#> Comparison: mean(dY/dX)
#> Columns: rowid, term, contrast, bill_length_mm, sex, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted

Created on 2025-04-15 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment