jrosell · October 23, 2024 10:35
diff --git a/visualizing-types-posteriors.R b/visualizing-types-posteriors.R
 # Original code: https://www.andrewheiss.com/blog/2022/09/26/guide-visualizing-types-posteriors/

 # Preparations -----------------------------------------------------------------

 cat("repos: ", getOption("repos"), "\n")
 if (!requireNamespace("rlang", quietly = TRUE)) stop("Please, install.packages('rlang')")
 if (!requireNamespace("pak", quietly = TRUE)) stop("Please, install.packages('pak')")
 pkgs <- rlang::chr(
  "tidyverse",        # ggplot, dplyr, purrr and friends
  "patchwork",        # Combine ggplot plots
  "ggtext",           # Fancier text in ggplot plots
  "scales",           # Labeling functions
  "brms",             # Bayesian modeling through Stan
  "tidybayes",        # Manipulate Stan objects in a tidy way
  "marginaleffects",  # Calculate marginal effects
  "modelr",           # For quick model grids
  "extraDistr",       # For dprop() beta distribution with mu/phi
  "distributional",   # For plotting distributions with ggdist
  "palmerpenguins",   # Penguins dataset
  "kableExtra",       # For nicer tables
  "MetBrewer",        # Colors
  "cmdstanr",         # Stan backend
  "broom.mixed",      # Extract tidy information from mixed models
 )
 rlang::check_installed(pkgs)
 purrr::walk(pkgs, library, character.only = TRUE, quietly = TRUE)

 options(mc.cores = 4, brms.backend = "cmdstanr")
 cmdstanr::install_cmdstan()
 set.seed(1234)
 bayes_seed <- 1234

 # Custom ggplot themes to make pretty plots
 # Get Roboto Condensed at https://fonts.google.com/specimen/Roboto+Condensed
 # Get Roboto Mono at https://fonts.google.com/specimen/Roboto+Mono
 clrs <- MetBrewer::met.brewer("Java")
 theme_pred <- function() {
  theme_minimal(base_family = "Roboto Condensed") +
    theme(panel.grid.minor = element_blank(),
          plot.background = element_rect(fill = "white", color = NA),
          plot.title = element_text(face = "bold"),
          strip.text = element_text(face = "bold"),
          strip.background = element_rect(fill = "grey80", color = NA),
          axis.title.x = element_text(hjust = 0),
          axis.title.y = element_text(hjust = 0),
          legend.title = element_text(face = "bold"))
 }
 theme_pred_dist <- function() {
  theme_pred() +
    theme(plot.title = element_markdown(family = "Roboto Condensed", face = "plain"),
          plot.subtitle = element_text(family = "Roboto Mono", size = rel(0.9), hjust = 0),
          axis.text.y = element_blank(),
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank())
 }
 theme_pred_range <- function() {
  theme_pred() +
    theme(plot.title = element_markdown(family = "Roboto Condensed", face = "plain"),
          plot.subtitle = element_text(family = "Roboto Mono", size = rel(0.9), hjust = 0),
          panel.grid.minor.y = element_blank())
 }
 update_geom_defaults("text", list(family = "Roboto Condensed", lineheight = 1))


 # Data -------------------------------------------------------------------------

 penguins <- penguins |> 
  drop_na(sex) |> 
  mutate(is_gentoo = species == "Gentoo") |> 
  mutate(bill_ratio = bill_depth_mm / bill_length_mm) |> 
  print()


 # Normal Gaussian model --------------------------------------------------------


 ggplot(penguins, aes(x = flipper_length_mm, y = body_mass_g)) +
  geom_point(size = 1, alpha = 0.7) +
  geom_smooth(method = "lm", color = clrs[5], se = FALSE) +
  scale_y_continuous(labels = label_comma()) +
  coord_cartesian(ylim = c(2000, 6000)) +
  labs(x = "Flipper length (mm)", y = "Body mass (g)") +
  theme_pred()

 model_normal <- brm(
  bf(body_mass_g ~ flipper_length_mm),
  family = gaussian(),
  data = penguins
 )

 # Calculate posterior distribution and get the means for the parameters --------
 broom.mixed::tidy(model_normal) |> 
  bind_cols(parameter = c("α", "β", "σ")) |> 
  select(parameter, term, estimate, std.error, conf.low, conf.high)
 # # A tibble: 3 × 6
 # parameter term              estimate std.error conf.low conf.high
 # <chr>     <chr>                <dbl>     <dbl>    <dbl>     <dbl>
 # 1 α         (Intercept)        -5874.     314.    -6488.    -5266. 
 # 2 β         flipper_length_mm     50.2      1.56     47.1      53.2
 # 3 σ         sd__Observation      395.      15.5     366.      427. 



 # Prediction of the outcome based on a single value of flipper lengths ---------

 penguins_avg_flipper <- penguins |> 
  summarize(flipper_length_mm = mean(flipper_length_mm)) |> 
  print()

 normal_linpred <- model_normal |> 
  linpred_draws(newdata = penguins_avg_flipper) |> 
  print()

 normal_epred <- model_normal |> 
  epred_draws(newdata = penguins_avg_flipper) |> 
  print()

 normal_predicted <- model_normal |> 
  predicted_draws(
    newdata = penguins_avg_flipper,
    seed = 12345
  ) |> 
  print()


 summary_normal_linpred <- normal_linpred |> 
  ungroup() |> 
  summarize(across(.linpred, lst(mean, sd, median), .names = "{.fn}"))

 summary_normal_epred <- normal_epred |> 
  ungroup() |> 
  summarize(across(.epred, lst(mean, sd, median), .names = "{.fn}"))

 summary_normal_predicted <- normal_predicted |> 
  ungroup() |> 
  summarize(across(.prediction, lst(mean, sd, median), .names = "{.fn}"))

 tribble(
  ~Function, ~`Model element`,
  "<code>posterior_linpred()</code>", "\\(\\mu\\) in the model",
  "<code>posterior_epred()</code>", "\\(\\operatorname{E(y)}\\) and \\(\\mu\\) in the model",
  "<code>posterior_predict()</code>", "Random draws from posterior \\(\\operatorname{Normal}(\\mu_i, \\sigma)\\)"
 ) |> 
  bind_cols(bind_rows(summary_normal_linpred, summary_normal_epred, summary_normal_predicted)) |> 
  kbl(escape = FALSE) |> 
  kable_styling()

 p1 <- ggplot(normal_linpred, aes(x = .linpred)) +
  stat_halfeye(fill = clrs[3]) +
  scale_x_continuous(labels = label_comma()) +
  coord_cartesian(xlim = c(4100, 4300)) +
  labs(x = "Body mass (g)", y = NULL,
       title = "**Linear predictor** <span style='font-size: 14px;'>*µ* in the model</span>",
       subtitle = "posterior_linpred(..., tibble(flipper_length_mm = 201))") +
  theme_pred_dist() +
  theme(plot.title = element_markdown())

 p2 <- ggplot(normal_epred, aes(x = .epred)) +
  stat_halfeye(fill = clrs[2]) +
  scale_x_continuous(labels = label_comma()) +
  coord_cartesian(xlim = c(4100, 4300)) +
  labs(x = "Body mass (g)", y = NULL,
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] and *µ* in the model</span>",
       subtitle = "posterior_epred(..., tibble(flipper_length_mm = 201))") +
  theme_pred_dist()

 p3 <- ggplot(normal_predicted, aes(x = .prediction)) +
  stat_halfeye(fill = clrs[1]) +
  scale_x_continuous(labels = label_comma()) +
  coord_cartesian(xlim = c(2900, 5500)) +
  labs(x = "Body mass (g)", y = NULL,
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Normal(*µ*, *σ*)</span>",
       subtitle = "posterior_predict(..., tibble(flipper_length_mm = 201))") +
  theme_pred_dist()

 (p1 / plot_spacer() / p2 / plot_spacer() / p3) +
  plot_layout(heights = c(0.3, 0.05, 0.3, 0.05, 0.3)) +
  plot_annotation("For posterior_linpred() and posterior_epred(), the standard error is tiny and the range of plausible predicted values is really narrow.\nFor posterior_predict(), the standard error is substantially bigger, and the corresponding range of predicted values is huge.")


 # Both posterior_linpred() and posterior_epred() correspond to the mu part of the model
 # The distribution of the part of the model here does not incorporate information about sigma.
 # That’s why the distribution is so narrow.

 linpred_manual <- model_normal |> 
  spread_draws(b_Intercept, b_flipper_length_mm) |> 
  mutate(mu = b_Intercept + 
           (b_flipper_length_mm * penguins_avg_flipper$flipper_length_mm))

 p1_manual <- linpred_manual |> 
  ggplot(aes(x = mu)) + 
  stat_halfeye(fill = colorspace::lighten(clrs[3], 0.5)) +
  scale_x_continuous(labels = label_comma()) +
  coord_cartesian(xlim = c(4100, 4300)) +
  labs(x = "Body mass (g)", y = NULL,
        title = "**Linear predictor** <span style='font-size: 14px;'>*µ* in the model</span>",
        subtitle = "b_Intercept + (b_flipper_length_mm * 201)") +
  theme_pred_dist() +
  theme(plot.title = element_markdown())

 p1_manual | p1


 # The results from posterior_predict(), on the other hand, correspond to the part of the model.
 # Officially, they are draws from a random normal distribution using both the estimated mu and the estimated sigma. These results contain the full uncertainty of the posterior distribution of penguin weight.


 set.seed(12345)  # To get the same results as posterior_predict() from earlier

 postpred_manual <- model_normal |> 
  spread_draws(b_Intercept, b_flipper_length_mm, sigma) |> 
  mutate(mu = b_Intercept + 
           (b_flipper_length_mm * 
              penguins_avg_flipper$flipper_length_mm),  # This is posterior_linpred()
         y_new = rnorm(n(), mean = mu, sd = sigma))  # This is posterior_predict()

 p3_manual <- postpred_manual |> 
  ggplot(aes(x = y_new)) + 
  stat_halfeye(fill = colorspace::lighten(clrs[1], 0.5)) +
  scale_x_continuous(labels = label_comma()) +
  coord_cartesian(xlim = c(2900, 5500)) +
  labs(x = "Body mass (g)", y = NULL,
        title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Normal(*µ*, *σ*)</span>",
        subtitle = "rnorm(b_Intercept + (b_flipper_length_mm * 201), sigma)") +
  theme_pred_dist() +
  theme(plot.title = element_markdown())

 p3_manual | p3


 # Plug a flipper length of 201 mm into the posterior estimates of the intercept and slope to calculate the part of the model

 epred_manual <- model_normal |> 
  spread_draws(b_Intercept, b_flipper_length_mm, sigma) |> 
  mutate(
    mu = b_Intercept + (b_flipper_length_mm * penguins_avg_flipper$flipper_length_mm),
    y_new = rnorm(n(), mean = mu, sd = sigma)
  ) |> 
  print()


 # This is posterior_epred()
 epred_manual |> 
  summarize(epred = mean(y_new))


 # It's essentially the same as the actual posterior_epred()
 normal_epred |> 
  ungroup() |>
  summarize(epred = mean(.epred))

 epred_manual <- model_normal |> 
  spread_draws(b_Intercept, b_flipper_length_mm, sigma) |> 
  mutate(mu = b_Intercept + 
           (b_flipper_length_mm * 
              penguins_avg_flipper$flipper_length_mm),  # This is posterior_linpred()
         y_new = rnorm(n(), mean = mu, sd = sigma))  # This is posterior_predict()

 # This is posterior_epred()
 epred_manual |> 
  summarize(epred = mean(y_new))
 ## # A tibble: 1 × 1
 ##   epred
 ##   <dbl>
 ## 1 4204.

 # It's essentially the same as the actual posterior_epred()
 normal_epred |> 
  ungroup() |>
  summarize(epred = mean(.epred))
 ## # A tibble: 1 × 1
 ##   epred
 ##   <dbl>
 ## 1 4206.


 # Posterior predictions across a range of possible flipper lengths -------------

 p1 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_linpred_draws(model_normal, ndraws = 100) |> 
  ggplot(aes(x = flipper_length_mm)) +
  stat_lineribbon(aes(y = .linpred), .width = 0.95,
                  alpha = 0.5, color = clrs[3], fill = clrs[3]) +
  geom_point(data = penguins, aes(y = body_mass_g), size = 1, alpha = 0.7) +
  scale_y_continuous(labels = label_comma()) +
  coord_cartesian(ylim = c(2000, 6000)) +
  labs(x = "Flipper length (mm)", y = "Body mass (g)",
       title = "**Linear predictor** <span style='font-size: 14px;'>*µ* in the model</span>",
       subtitle = "posterior_linpred()") +
  theme_pred_range()

 p2 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_epred_draws(model_normal, ndraws = 100) |> 
  ggplot(aes(x = flipper_length_mm)) +
  stat_lineribbon(aes(y = .epred), .width = 0.95,
                  alpha = 0.5, color = clrs[2], fill = clrs[2]) +
  geom_point(data = penguins, aes(y = body_mass_g), size = 1, alpha = 0.7) +
  scale_y_continuous(labels = label_comma()) +
  coord_cartesian(ylim = c(2000, 6000)) +
  labs(x = "Flipper length (mm)", y = "Body mass (g)",
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] and *µ* in the model</span>",
       subtitle = "posterior_epred()") +
  theme_pred_range()

 p3 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_predicted_draws(model_normal, ndraws = 100) |> 
  ggplot(aes(x = flipper_length_mm)) +
  stat_lineribbon(aes(y = .prediction), .width = 0.95,
                  alpha = 0.5, color = clrs[1], fill = clrs[1]) +
  geom_point(data = penguins, aes(y = body_mass_g), size = 1, alpha = 0.7) +
  scale_y_continuous(labels = label_comma()) +
  coord_cartesian(ylim = c(2000, 6000)) +
  labs(x = "Flipper length (mm)", y = "Body mass (g)",
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Normal(*µ*, *σ*)</span>",
       subtitle = "posterior_predict()") +
  theme_pred_range()

 (p1 / plot_spacer() / p2 / plot_spacer() / p3) +
  plot_layout(heights = c(0.3, 0.05, 0.3, 0.05, 0.3))




 # Logistic regression example --------------------------------------------------

 # Generalized linear models: logistic, probit, ordered logistic, exponential, Poisson, negative binomial, etc.
 # Use special link functions (e.g. logit, log, etc.) to transform the likelihood of an outcome into a scale that is more amenable to linear regression.

 ggplot(penguins, aes(x = bill_length_mm, y = as.numeric(is_gentoo))) +
  geom_dots(aes(side = ifelse(is_gentoo, "bottom", "top")), 
            pch = 19, color = "grey20", scale = 0.2) +
  geom_smooth(method = "glm", method.args = list(family = binomial(link = "logit")),
              color = clrs[5], se = FALSE) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Bill length (mm)", y = "Probability of being a Gentoo") +
  theme_pred()

 model_logit <- brm(
  bf(is_gentoo ~ bill_length_mm),
  family = bernoulli(link = "logit"),
  data = penguins
 )

 # Make a little dataset of just the average bill length and Extract different types of posteriors
 penguins_avg_bill <- penguins |> 
  summarize(bill_length_mm = mean(bill_length_mm))
 logit_linpred <- model_logit |> 
  linpred_draws(newdata = penguins_avg_bill)
 logit_epred <- model_logit |> 
  epred_draws(newdata = penguins_avg_bill)
 logit_predicted <- model_logit |> 
  predicted_draws(newdata = penguins_avg_bill)

 summary_logit_linpred <- logit_linpred |> 
  ungroup() |> 
  summarize(across(.linpred, lst(mean, sd, median), .names = "{.fn}"))

 summary_logit_epred <- logit_epred |> 
  ungroup() |> 
  summarize(across(.epred, lst(mean, sd, median), .names = "{.fn}"))

 summary_logit_predicted <- logit_predicted |> 
  ungroup() |> 
  summarize(across(.prediction, lst(mean), .names = "{.fn}"))

 tribble(
  ~Function, ~`Model element`, ~Values,
  "<code>posterior_linpred()</code>", "\\(\\operatorname{logit}(\\pi)\\) in the model", "Logits or log odds",
  "<code>posterior_linpred(transform = TRUE)</code> or <code>posterior_epred()</code>", "\\(\\operatorname{E(y)}\\) and \\(\\pi\\) in the model", "Probabilities",
  "<code>posterior_predict()</code>", "Random draws from posterior \\(\\operatorname{Binomial}(1, \\pi)\\)", "0s and 1s"
 ) |> 
  bind_cols(bind_rows(summary_logit_linpred, summary_logit_epred, summary_logit_predicted)) |> 
  kbl(escape = FALSE) |> 
  kable_styling()

 # The results from posterior_epred() and posterior_linpred() are on different scales
 # posterior_epred() provides results on the probability scale, un-logiting and back-transforming the results from posterior_linpred() (which provides results on the logit scale).
 # Technically, posterior_epred() isn’t just the back-transformed linear predictor (if you want that, you can use posterior_linpred(..., transform = TRUE)). More formally, posterior_epred() returns the expected values of the posterior, or , or the average of the posterior’s averages. But as with Gaussian regression, for mathy reasons this average-of-averages happens to be the same as the back-transformed 

 p1 <- ggplot(logit_linpred, aes(x = .linpred)) +
  stat_halfeye(fill = clrs[3]) +
  coord_cartesian(xlim = c(-1.5, -0.2)) +
  labs(x = "Logit-transformed probability of being a Gentoo", y = NULL,
       title = "**Linear predictor** <span style='font-size: 14px;'>logit(*π*) in the model</span>",
       subtitle = "posterior_linpred(..., tibble(bill_length_mm = 44))") +
  theme_pred_dist()

 p2 <- ggplot(logit_epred, aes(x = .epred)) +
  stat_halfeye(fill = clrs[2]) +
  scale_x_continuous(labels = label_percent()) +
  coord_cartesian(xlim = c(0.2, 0.45)) +
  labs(x = "Probability of being a Gentoo", y = NULL,
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] and *π* in the model</span>",
       subtitle = "posterior_epred(..., tibble(bill_length_mm = 44))") +
  theme_pred_dist()

 p3 <- logit_predicted |> 
  count(is_gentoo = .prediction) |> 
  mutate(prop = n / sum(n),
         prop_nice = label_percent(accuracy = 0.1)(prop)) |> 
  ggplot(aes(x = factor(is_gentoo), y = n)) +
  geom_col(fill = clrs[1]) +
  geom_text(aes(label = prop_nice), nudge_y = -300, color = "white", size = 3) +
  scale_x_discrete(labels = c("Not Gentoo (0)", "Gentoo (1)")) +
  scale_y_continuous(labels = label_comma()) +
  labs(x = "Prediction of being a Gentoo", y = NULL,
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Binomial(1, *π*)</span>",
       subtitle = "posterior_predict(..., tibble(bill_length_mm = 44))") +
  theme_pred_range() +
  theme(panel.grid.major.x = element_blank())

 (p1 / plot_spacer() / p2 / plot_spacer() / p3) +
  plot_layout(heights = c(0.3, 0.05, 0.3, 0.05, 0.3))


 # Posterior predictions across a range of bill lengths  -----------------------

 pred_logit_gentoo <- tibble(bill_length_mm = c(35, 45, 55)) |> 
  add_predicted_draws(model_logit, ndraws = 500)

 pred_logit_gentoo_summary <- pred_logit_gentoo |> 
  group_by(bill_length_mm) |> 
  summarize(prop = mean(.prediction),
            prop_nice = paste0(label_percent(accuracy = 0.1)(prop), "\nGentoos"))

 p1 <- penguins |> 
  data_grid(bill_length_mm = seq_range(bill_length_mm, n = 100)) |> 
  add_linpred_draws(model_logit, ndraws = 100) |> 
  ggplot(aes(x = bill_length_mm)) +
  stat_lineribbon(aes(y = .linpred), .width = 0.95,
                  alpha = 0.5, color = clrs[3], fill = clrs[3]) +
  coord_cartesian(xlim = c(30, 60)) +
  labs(x = "Bill length (mm)", y = "Logit-transformed\nprobability of being a Gentoo",
       title = "**Linear predictor posterior** <span style='font-size: 14px;'>logit(*π*) in the model</span>",
       subtitle = "posterior_linpred()") +
  theme_pred_range()

 p2 <- penguins |> 
  data_grid(bill_length_mm = seq_range(bill_length_mm, n = 100)) |> 
  add_epred_draws(model_logit, ndraws = 100) |> 
  ggplot(aes(x = bill_length_mm)) +
  geom_dots(data = penguins, aes(y = as.numeric(is_gentoo), x = bill_length_mm, 
                                 side = ifelse(is_gentoo, "bottom", "top")), 
            pch = 19, color = "grey20", scale = 0.2) +
  stat_lineribbon(aes(y = .epred), .width = 0.95,
                  alpha = 0.5, color = clrs[2], fill = clrs[2]) +
  scale_y_continuous(labels = label_percent()) +
  coord_cartesian(xlim = c(30, 60)) +
  labs(x = "Bill length (mm)", y = "Probability of\nbeing a Gentoo",
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] and *π* in the model</span>",
       subtitle = "posterior_epred()") +
  theme_pred_range()

 p3 <- ggplot(pred_logit_gentoo, aes(x = factor(bill_length_mm), y = .prediction)) +
  geom_point(position = position_jitter(width = 0.2, height = 0.1, seed = 1234),
             size = 0.75, alpha = 0.3, color = clrs[1]) +
  geom_text(data = pred_logit_gentoo_summary, aes(y = 0.5, label = prop_nice), size = 3) +  
  scale_y_continuous(breaks = c(0, 1), labels = c("Not\nGentoo", "Gentoo")) +
  labs(x = "Bill length (mm)", y = "Prediction of\nbeing a Gentoo",
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Binomial(1, *π*)</span>",
       subtitle = "posterior_predict()") +
  theme_pred_range() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.text.y = element_text(angle = 90, hjust = 0.5))

 (p1 / p2 / p3)


 # Beta regression example ------------------------------------------------------

 # Regression models often focus solely on the location parameter of the model (e.g., in ; in ). However, it is also possible to specify separate predictors for the scale or shape parameters of models (e.g., in , in ). In the world of brms, these are called distributional models.


 ggplot(penguins, aes(x = flipper_length_mm, y = bill_ratio)) +
  geom_point(size = 1, alpha = 0.7) +
  geom_smooth(method = "lm", color = clrs[5], se = FALSE) +
  labs(x = "Flipper length (mm)", y = "Ratio of bill depth / bill length") +
  theme_pred()

 model_beta <- brm(
  bf(bill_ratio ~ flipper_length_mm,
     phi ~ flipper_length_mm),
  family = Beta(),
  init = "0",
  data = penguins,
  prior = c(prior(normal(0, 1), class = "b"),
            prior(exponential(1), class = "b", dpar = "phi", lb = 0))
 )

 penguins_avg_flipper <- penguins |> 
  summarize(flipper_length_mm = mean(flipper_length_mm))

 # Extract different types of posteriors
 beta_linpred <- model_beta |> 
  linpred_draws(newdata = penguins_avg_flipper)
 beta_linpred_phi <- model_beta |> 
  linpred_draws(newdata = penguins_avg_flipper, dpar = "phi")
 beta_linpred_trans <- model_beta |> 
  linpred_draws(newdata = penguins_avg_flipper, transform = TRUE)
 beta_linpred_phi_trans <- model_beta |> 
  linpred_draws(newdata = penguins_avg_flipper, dpar = "phi", transform = TRUE)
 beta_epred <- model_beta |> 
  epred_draws(newdata = penguins_avg_flipper)
 beta_predicted <- model_beta |> 
  predicted_draws(newdata = penguins_avg_flipper)

 # Beta posteriors --------------------------------

 summary_beta_linpred <- beta_linpred |> 
  ungroup() |> 
  summarize(across(.linpred, lst(mean, sd, median), .names = "{.fn}"))

 summary_beta_linpred_phi <- beta_linpred_phi |> 
  ungroup() |> 
  summarize(across(phi, lst(mean, sd, median), .names = "{.fn}"))

 summary_beta_linpred_phi_trans <- beta_linpred_phi_trans |> 
  ungroup() |> 
  summarize(across(phi, lst(mean, sd, median), .names = "{.fn}"))

 summary_beta_epred <- beta_epred |> 
  ungroup() |> 
  summarize(across(.epred, lst(mean, sd, median), .names = "{.fn}"))

 summary_beta_predicted <- beta_predicted |> 
  ungroup() |> 
  summarize(across(.prediction, lst(mean, sd, median), .names = "{.fn}"))

 tribble(
  ~Function, ~`Model element`, ~Values,
  "<code>posterior_linpred()</code>", "\\(\\operatorname{logit}(\\mu)\\) in the model", "Logits or log odds",
  "<code>posterior_linpred(transform = TRUE)</code> or <code>posterior_epred()</code>", "\\(\\operatorname{E(y)}\\) and \\(\\mu\\) in the model", "Probabilities",
  '<code>posterior_linpred(dpar = "phi")</code>', "\\(\\log(\\phi)\\) in the model", "Logged precision values",
  '<code>posterior_linpred(dpar = "phi", transform = TRUE)</code>', "\\(\\phi\\) in the model", "Unlogged precision values",
  "<code>posterior_predict()</code>", "Random draws from posterior \\(\\operatorname{Beta}(\\mu, \\phi)\\)", "Values between 0–1"
 ) |> 
  bind_cols(bind_rows(summary_beta_linpred, summary_beta_epred, 
                      summary_beta_linpred_phi, summary_beta_linpred_phi_trans,
                      summary_beta_predicted)) |> 
  kbl(escape = FALSE) |> 
  kable_styling()

 p1 <- ggplot(beta_linpred, aes(x = .linpred)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Logit-scale ratio of bill depth / bill length", y = NULL,
       title = "**Linear predictor** <span style='font-size: 14px;'>logit(*µ*) in the model</span>",
       subtitle = "posterior_linpred(\n  ..., tibble(flipper_length_mm = 201))\n") +
  theme_pred_dist()

 p1a <- ggplot(beta_linpred_phi, aes(x = phi)) +
  stat_halfeye(fill = colorspace::lighten(clrs[3], 0.3)) +
  labs(x = "Log-scale precision parameter", y = NULL,
       title = "**Precision parameter** <span style='font-size: 14px;'>log(*φ*) in the model</span>",
       subtitle = 'posterior_linpred(\n  ..., tibble(flipper_length_mm = 201),\n  dpar = "phi")') +
  theme_pred_dist()

 p2 <- ggplot(beta_epred, aes(x = .epred)) +
  stat_halfeye(fill = clrs[2]) +
  labs(x = "Ratio of bill depth / bill length", y = NULL,
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] or *µ* in the model</span>",
       subtitle = "posterior_epred(\n  ..., tibble(flipper_length_mm = 201)) # or \nposterior_linpred(..., transform = TRUE)") +
  theme_pred_dist()

 p2a <- ggplot(beta_linpred_phi_trans, aes(x = phi)) +
  stat_halfeye(fill = colorspace::lighten(clrs[2], 0.4)) +
  labs(x = "Precision parameter", y = NULL,
       title = "**Precision parameter** <span style='font-size: 14px;'>*φ* in the model</span>",
       subtitle = 'posterior_linpred(\n  ..., tibble(flipper_length_mm = 201),\n  dpar = "phi", transform = TRUE)\n') +
  theme_pred_dist()

 p3 <- ggplot(beta_predicted, aes(x = .prediction)) +
  stat_halfeye(fill = clrs[1]) +
  coord_cartesian(xlim = c(0.2, 0.6)) +
  labs(x = "Ratio of bill depth / bill length", y = NULL,
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Beta(*µ*, *φ*)</span>",
       subtitle = "posterior_predict()") +
  theme_pred_dist()

 layout <- "
 AB
 CC
 DE
 FF
 GG
 "

 p1 + p1a + plot_spacer() + p2 + p2a + plot_spacer() + p3 +
  plot_layout(design = layout, heights = c(0.3, 0.05, 0.3, 0.05, 0.3))

 # Posterior predictions for these different parameters across a range of flipper lengths


 p1 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_linpred_draws(model_beta, ndraws = 100) |> 
  ggplot(aes(x = flipper_length_mm)) +
  geom_point(data = penguins, aes(y = qlogis(bill_ratio)), size = 1, alpha = 0.7) +
  stat_lineribbon(aes(y = .linpred), .width = 0.95,
                  alpha = 0.5, color = clrs[3], fill = clrs[3]) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Logit-scale ratio of\nbill depth / bill length",
       title = "**Linear predictor posterior** <span style='font-size: 14px;'>logit(*µ*) in the model</span>",
       subtitle = "posterior_linpred()") +
  theme_pred_range()

 p1a <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_linpred_draws(model_beta, ndraws = 100, dpar = "phi") |> 
  ggplot(aes(x = flipper_length_mm)) +
  stat_lineribbon(aes(y = phi), .width = 0.95, alpha = 0.5, 
                  color = colorspace::lighten(clrs[3], 0.3), fill = colorspace::lighten(clrs[3], 0.3)) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Log-scale\nprecision parameter",
       title = "**Precision parameter** <span style='font-size: 14px;'>log(*φ*) in the model</span>",
       subtitle = 'posterior_linpred(dpar = "phi")') +
  theme_pred_range()

 p2 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_epred_draws(model_beta, ndraws = 100) |> 
  ggplot(aes(x = flipper_length_mm)) +
  geom_point(data = penguins, aes(y = bill_ratio), size = 1, alpha = 0.7) +
  stat_lineribbon(aes(y = .epred), .width = 0.95,
                  alpha = 0.5, color = clrs[2], fill = clrs[2]) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Ratio of\nbill depth / bill length",
       title = "**Expectation of the posterior** <span style='font-size: 14px;'>E[*y*] or *µ* in the model</span>",
       subtitle = 'posterior_epred()\nposterior_linpred(transform = TRUE)') +
  theme_pred_range()

 p2a <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_epred_draws(model_beta, ndraws = 100, dpar = "phi") |> 
  ggplot(aes(x = flipper_length_mm)) +
  stat_lineribbon(aes(y = phi), .width = 0.95, alpha = 0.5, 
                  color = colorspace::lighten(clrs[2], 0.4), fill = colorspace::lighten(clrs[2], 0.4)) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Precision parameter",
       title = "**Precision parameter** <span style='font-size: 14px;'>*φ* in the model</span>",
       subtitle = 'posterior_linpred(dpar = "phi",\n                  transform = TRUE)') +
  theme_pred_range()

 p3 <- penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_predicted_draws(model_beta, ndraws = 500) |> 
  ggplot(aes(x = flipper_length_mm)) +
  geom_point(data = penguins, aes(y = bill_ratio), size = 1, alpha = 0.7) +
  stat_lineribbon(aes(y = .prediction), .width = 0.95,
                  alpha = 0.5, color = clrs[1], fill = clrs[1]) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Ratio of\nbill depth / bill length",
       title = "**Posterior predictions** <span style='font-size: 14px;'>Random draws from posterior Beta(*µ*, *φ*)</span>",
       subtitle = "posterior_predict()") +
  theme_pred_range()

 layout <- "
 AB
 CC
 DE
 FF
 GG
 "

 p1 + p1a + plot_spacer() + p2 + p2a + plot_spacer() + p3 +
  plot_layout(design = layout, heights = c(0.3, 0.05, 0.3, 0.05, 0.3))


 # Playing with posterior beta parameters

 mu <- summary_beta_epred$mean
 phi <- summary_beta_linpred_phi_trans$mean

 ggplot(penguins, aes(x = bill_ratio)) +
  geom_density(aes(fill = "Actual data"), color = NA) +
  stat_function(
    aes(fill = glue::glue("Beta(µ = {round(mu, 3)}, φ = {round(phi, 2)})")),
    geom = "area", fun = ~ extraDistr::dprop(., mean = mu, size = phi),
    alpha = 0.7
  ) +
  scale_fill_manual(values = c(clrs[5], clrs[1]), name = NULL) +
  xlim(c(0.2, 0.65)) +
  labs(x = "Ratio of bill depth / bill length", y = NULL,
       title = "**Analytical posterior predictions** <span style='font-size: 14px;'>Average posterior *µ* and *φ* from the model</span>") +
  theme_pred_dist() +
  theme(legend.position = c(0, 0.9),
        legend.justification = "left",
        legend.key.size = unit(0.75, "lines"))


 muphi_to_shapes <- function(mu, phi) {
  shape1 <- mu * phi
  shape2 <- (1 - mu) * phi
  return(lst(shape1 = shape1, shape2 = shape2))
 }

 beta_posteriors <- tibble(flipper_length_mm = c(180, 200, 220)) |> 
  add_linpred_draws(model_beta, ndraws = 500, dpar = TRUE, transform = TRUE) |> 
  group_by(flipper_length_mm) |> 
  summarize(across(c(mu, phi), ~mean(.))) |> 
  ungroup() |> 
  mutate(shapes = map2(mu, phi, ~as_tibble(muphi_to_shapes(.x, .y)))) |> 
  unnest(shapes) |> 
  mutate(nice_label = glue::glue("Beta(µ = {round(mu, 3)}, φ = {round(phi, 2)})"))

 # Here are the parameters we'll use
 # We need to convert the mu and phi values to shape1 and shape2 so that we can
 # use dist_beta() to plot the halfeye distributions correctly
 beta_posteriors
 ## # A tibble: 3 × 6
 ##   flipper_length_mm    mu   phi shape1 shape2 nice_label                
 ##               <dbl> <dbl> <dbl>  <dbl>  <dbl> <glue>                    
 ## 1               180 0.485  57.3   27.8   29.5 Beta(µ = 0.485, φ = 57.29)
 ## 2               200 0.400 104.    41.5   62.4 Beta(µ = 0.4, φ = 103.92) 
 ## 3               220 0.320 191.    61.3  130.  Beta(µ = 0.32, φ = 191.31)

 penguins |> 
  data_grid(flipper_length_mm = seq_range(flipper_length_mm, n = 100)) |> 
  add_predicted_draws(model_beta, ndraws = 500) |> 
  ggplot(aes(x = flipper_length_mm)) +
  geom_point(data = penguins, aes(y = bill_ratio), size = 1, alpha = 0.7) +
  stat_halfeye(data = beta_posteriors, aes(ydist = dist_beta(shape1, shape2), y = NULL), 
                side = "bottom", fill = clrs[1], alpha = 0.75) +
  stat_lineribbon(aes(y = .prediction), .width = 0.95,
                  alpha = 0.1, color = clrs[1], fill = clrs[1]) +
  geom_text(data = beta_posteriors, 
            aes(x = flipper_length_mm, y = 0.9, label = nice_label),
            hjust = 0.5) +
  coord_cartesian(xlim = c(170, 230)) +
  labs(x = "Flipper length (mm)", y = "Ratio of\nbill depth / bill length",
        title = "**Analytical posterior predictions** <span style='font-size: 14px;'>Average posterior *µ* and *φ* from the model</span>") +
  theme_pred_range()