library(tidyverse)
library(broom)
library(parameters)
library(marginaleffects)
library(palmerpenguins)
penguins <- penguins |>
drop_na(sex) |>
mutate(is_gentoo = species == "Gentoo")
thing1 <- t.test(body_mass_g ~ sex, data = penguins)
thing1_tidied <- tidy(thing1)
thing1_tidied
#> # A tibble: 1 × 10
#> estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -683. 3862. 4546. -8.55 4.79e-16 324. -841. -526.
#> # ℹ 2 more variables: method <chr>, alternative <chr>
thing1_tidied$estimate
#> [1] -683.4118
# The difference in means is `{r} thing1_tidied$estimate` grams
asdf <- model_parameters(thing1)
asdf
#> Welch Two Sample t-test
#>
#> Parameter | Group | sex = female | sex = male | Difference
#> ------------------------------------------------------------
#> body_mass_g | sex | 3862.27 | 4545.68 | -683.41
#>
#> Parameter | 95% CI | t(323.90) | p
#> -----------------------------------------------------
#> body_mass_g | [-840.58, -526.25] | -8.55 | < .001
#>
#> Alternative hypothesis: true difference in means between group female and group male is not equal to 0
model1 <- lm(body_mass_g ~ flipper_length_mm + bill_length_mm + species, data = penguins)
summary(model1)
#>
#> Call:
#> lm(formula = body_mass_g ~ flipper_length_mm + bill_length_mm +
#> species, data = penguins)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -811.52 -227.81 -33.74 216.73 1053.26
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -3864.073 533.592 -7.242 3.18e-12 ***
#> flipper_length_mm 27.544 3.209 8.583 3.77e-16 ***
#> bill_length_mm 60.117 7.207 8.341 2.06e-15 ***
#> speciesChinstrap -732.417 82.063 -8.925 < 2e-16 ***
#> speciesGentoo 113.254 89.206 1.270 0.205
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 339.6 on 328 degrees of freedom
#> Multiple R-squared: 0.8243, Adjusted R-squared: 0.8222
#> F-statistic: 384.7 on 4 and 328 DF, p-value: < 2.2e-16
qwer <- tidy(model1, conf.int = TRUE)
qwer
#> # A tibble: 5 × 7
#> term estimate std.error statistic p.value conf.low conf.high
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) -3864. 534. -7.24 3.18e-12 -4914. -2814.
#> 2 flipper_length_mm 27.5 3.21 8.58 3.77e-16 21.2 33.9
#> 3 bill_length_mm 60.1 7.21 8.34 2.06e-15 45.9 74.3
#> 4 speciesChinstrap -732. 82.1 -8.93 3.22e-17 -894. -571.
#> 5 speciesGentoo 113. 89.2 1.27 2.05e- 1 -62.2 289.
qwer$estimate
#> [1] -3864.07323 27.54429 60.11732 -732.41667 113.25418
thing_to_include <- qwer |> filter(term == "(Intercept)") |> pull(estimate)
# The intercept is `{r} thing_to_include`
model_parameters(model1)
#> Parameter | Coefficient | SE | 95% CI | t(328) | p
#> -----------------------------------------------------------------------------------
#> (Intercept) | -3864.07 | 533.59 | [-4913.77, -2814.38] | -7.24 | < .001
#> flipper length mm | 27.54 | 3.21 | [ 21.23, 33.86] | 8.58 | < .001
#> bill length mm | 60.12 | 7.21 | [ 45.94, 74.30] | 8.34 | < .001
#> species [Chinstrap] | -732.42 | 82.06 | [ -893.85, -570.98] | -8.93 | < .001
#> species [Gentoo] | 113.25 | 89.21 | [ -62.23, 288.74] | 1.27 | 0.205
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
gof <- glance(model1)
model1 |>
tidy(conf.int = TRUE) |>
filter(term != "(Intercept)") |>
ggplot(aes(x = estimate, y = term)) +
annotate(geom = "text", label = round(gof$r.squared, 2), x = -600, y = "speciesGentoo") +
geom_vline(xintercept = 0) +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high))
model2 <- glm(is_gentoo ~ bill_length_mm + sex, family = binomial(), data = penguins)
tidy(model2)
#> # A tibble: 3 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) -12.4 1.48 -8.41 4.15e-17
#> 2 bill_length_mm 0.275 0.0335 8.21 2.13e-16
#> 3 sexmale -1.03 0.302 -3.42 6.25e- 4
model_parameters(model2, exponentiate = TRUE)
#> Parameter | Odds Ratio | SE | 95% CI | z | p
#> ----------------------------------------------------------------------
#> (Intercept) | 3.92e-06 | 5.81e-06 | [0.00, 0.00] | -8.41 | < .001
#> bill length mm | 1.32 | 0.04 | [1.24, 1.41] | 8.21 | < .001
#> sex [male] | 0.36 | 0.11 | [0.19, 0.63] | -3.42 | < .001
#>
#> Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
#> computed using a Wald z-distribution approximation.
plot_predictions(model2, condition = c("bill_length_mm", "sex"))
plot_predictions(model2, condition = c("bill_length_mm", "sex")) +
geom_vline(xintercept = c(40, 55))
avg_slopes(
model2,
newdata = datagrid(bill_length_mm = c(40, 55), sex = unique),
by = c("bill_length_mm", "sex"),
variables = "bill_length_mm"
)
#>
#> Term bill_length_mm sex Estimate Std. Error z Pr(>|z|) S
#> bill_length_mm 40 female 0.0427 0.00531 8.05 <0.001 50.1
#> bill_length_mm 40 male 0.0198 0.00378 5.24 <0.001 22.6
#> bill_length_mm 55 female 0.0163 0.00431 3.79 <0.001 12.7
#> bill_length_mm 55 male 0.0369 0.00500 7.38 <0.001 42.5
#> 2.5 % 97.5 %
#> 0.03233 0.0531
#> 0.01241 0.0272
#> 0.00789 0.0248
#> 0.02713 0.0467
#>
#> Type: response
#> Comparison: mean(dY/dX)
#> Columns: rowid, term, contrast, bill_length_mm, sex, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted
Created on 2025-04-15 with reprex v2.1.1