Last active
December 9, 2023 19:50
-
-
Save vankesteren/e270094fd59d2c9b73a5d453b3c38964 to your computer and use it in GitHub Desktop.
Marginal density ratio plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Marginal density ratio plot | |
#' | |
#' A plot to compare two (continuous) distributions in the | |
#' relative number of occurrences on a particular variable. | |
#' The transparency of the background histogram indicates | |
#' how much data is available at that location. | |
#' | |
#' @param dr_fit fitted model from the densityratio package | |
#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> variable from the data | |
#' | |
#' @return A ggplot object | |
#' | |
#' @details | |
#' The smooth is computed marginally using a default thin plate spline model. | |
#' | |
#' @export | |
plot_univariate <- function(dr_fit, var, smooth = TRUE) { | |
df <- bind_rows( | |
as_tibble(dr_fit$df_numerator), | |
as_tibble(dr_fit$df_denominator) | |
) | |
df$ratio__ <- c(predict(dr_fit, newdata = df)) | |
df$lratio__ <- suppressWarnings(log(df$ratio__)) | |
df$trans__ <- 1 - binomial()$linkinv(df$lratio__) | |
df$src__ <- as_factor(c(rep("n", nrow(dr_fit$df_numerator)), rep("d", nrow(dr_fit$df_denominator)))) | |
frm <- as.formula(rlang::englue('lratio__ ~ s({{ var }}, bs = "tp")')) | |
gam_mod <- mgcv::gam(frm, data = df) | |
df$rsmooth__ <- predict(gam_mod, newdata = df) | |
df$tsmooth__ <- 1 - binomial()$linkinv(df$rsmooth__) | |
nbins_fd <- nclass.FD(df |> pull({{var}})) | |
ggplot(df, aes(x = {{ var }})) + | |
geom_histogram( | |
mapping = aes(fill = src__, alpha = after_stat(ncount)), | |
position = position_fill(), | |
bins = nbins_fd | |
) + | |
geom_hline(yintercept = 0.5, lty = 2, colour = "#343434") + | |
geom_point( | |
aes(y = trans__), | |
alpha = 0.1, | |
pch = 21, | |
fill = "black", | |
colour = "transparent", | |
size = 1.3, | |
na.rm = TRUE | |
) + | |
geom_line(aes(y = tsmooth__), alpha = 0.9) + | |
scale_x_continuous(expand = c(0, 0)) + | |
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) + | |
scale_fill_manual(values = c("firebrick", "steelblue"), guide = "none") + | |
scale_alpha_continuous(guide = "none") + | |
theme_minimal() + | |
theme( | |
axis.text.y = element_blank(), | |
panel.border = element_rect(fill = NA, colour = "#343434", linewidth = .8) | |
) + | |
labs( | |
y = "", | |
fill = "", | |
alpha = "", | |
title = rlang::englue("Marginal distribution comparison: {{var}}") | |
) | |
} |
Author
vankesteren
commented
Dec 8, 2023
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment