Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Last active December 9, 2023 19:50
Show Gist options
  • Save vankesteren/e270094fd59d2c9b73a5d453b3c38964 to your computer and use it in GitHub Desktop.
Save vankesteren/e270094fd59d2c9b73a5d453b3c38964 to your computer and use it in GitHub Desktop.
Marginal density ratio plot
#' Marginal density ratio plot
#'
#' A plot to compare two (continuous) distributions in the
#' relative number of occurrences on a particular variable.
#' The transparency of the background histogram indicates
#' how much data is available at that location.
#'
#' @param dr_fit fitted model from the densityratio package
#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> variable from the data
#'
#' @return A ggplot object
#'
#' @details
#' The smooth is computed marginally using a default thin plate spline model.
#'
#' @export
plot_univariate <- function(dr_fit, var, smooth = TRUE) {
df <- bind_rows(
as_tibble(dr_fit$df_numerator),
as_tibble(dr_fit$df_denominator)
)
df$ratio__ <- c(predict(dr_fit, newdata = df))
df$lratio__ <- suppressWarnings(log(df$ratio__))
df$trans__ <- 1 - binomial()$linkinv(df$lratio__)
df$src__ <- as_factor(c(rep("n", nrow(dr_fit$df_numerator)), rep("d", nrow(dr_fit$df_denominator))))
frm <- as.formula(rlang::englue('lratio__ ~ s({{ var }}, bs = "tp")'))
gam_mod <- mgcv::gam(frm, data = df)
df$rsmooth__ <- predict(gam_mod, newdata = df)
df$tsmooth__ <- 1 - binomial()$linkinv(df$rsmooth__)
nbins_fd <- nclass.FD(df |> pull({{var}}))
ggplot(df, aes(x = {{ var }})) +
geom_histogram(
mapping = aes(fill = src__, alpha = after_stat(ncount)),
position = position_fill(),
bins = nbins_fd
) +
geom_hline(yintercept = 0.5, lty = 2, colour = "#343434") +
geom_point(
aes(y = trans__),
alpha = 0.1,
pch = 21,
fill = "black",
colour = "transparent",
size = 1.3,
na.rm = TRUE
) +
geom_line(aes(y = tsmooth__), alpha = 0.9) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
scale_fill_manual(values = c("firebrick", "steelblue"), guide = "none") +
scale_alpha_continuous(guide = "none") +
theme_minimal() +
theme(
axis.text.y = element_blank(),
panel.border = element_rect(fill = NA, colour = "#343434", linewidth = .8)
) +
labs(
y = "",
fill = "",
alpha = "",
title = rlang::englue("Marginal distribution comparison: {{var}}")
)
}
@vankesteren
Copy link
Author

vankesteren commented Dec 8, 2023

library(densityratio)
library(tidyverse)
source("https://gist.githubusercontent.com/vankesteren/e270094fd59d2c9b73a5d453b3c38964/raw/4c6c09b0cd89df5f0c750f947138f8a04b8b86b5/drplot_marginal.R")
fit_u <- ulsif(numerator_data |> select(x3:x5), denominator_data |> select(x3:x5))
plot_univariate(fit_u, x5)

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment