Skip to content

Instantly share code, notes, and snippets.

@thomvolker
Last active January 11, 2024 09:51
Show Gist options
  • Save thomvolker/b217701a654ff732d4aa2da660f61b91 to your computer and use it in GitHub Desktop.
Save thomvolker/b217701a654ff732d4aa2da660f61b91 to your computer and use it in GitHub Desktop.
Density ratio estimation with and without an intercept

Density ratio estimation with and without intercept

When performing density ratio estimation, the regularization parameter $\lambda$ causes the estimated kernel weights to be shrunken towards zero. However, a ratio is least complex when it is one, rather than zero. Hence, the regularization yields a bias that implies more mass in the denominator samples (compared) to the numerator samples. By adding an intercept, we can mitigate the bias, but not completely remove it. This is the case because the intercept is also regularized, and hence shrunken towards zero.

Density ratio estimation without intercept ($n = 250$)

library(densityratio)
library(ggplot2)
library(furrr)
library(purrr)

set.seed(1)

plan(multisession)

dlaplace <- function(x, mu = 0, sd = 1) exp(-abs(x-mu)/(sd / sqrt(2))) / (2*(sd / sqrt(2))) 
rlaplace <- function(n, mu = 0, sd = 1) {
  p <- runif(n)
  b <- sd / sqrt(2)
  mu - b * sign(p - 0.5) * log(1 - 2*abs(p - 0.5))
}
dratio_lap_norm <- function(x, mu = 0, sd = 1) {
  dnorm(x, mu, sd) / dlaplace(x, mu, sd)
}

dratio_lnorm_norm <- function(x, mu = 0, sd = 1) {
  mean_rlnorm <- log(mu^2 / sqrt(mu^2 + sd^2))
  sd_rlnorm <- sqrt(log(1 + sd^2 / mu^2))
  dnorm(x, mu, sd) / dlnorm(x, mean_rlnorm, sd_rlnorm)
}

dratio_t_norm <- function(x, mu = 0, sd = 1) {
  df <- 2 / (1 - 1/sd^2)
  dnorm(x - mu, 0, sd) / dt(x - mu, df)
}

dratio_norm_norm <- function(x, mu = 0, sd = 1) {
  dnorm(x, mu, sd) / dnorm(x, mu, sd)
}

mu <- 1
sd <- sqrt(2)

nsim <- 100
n <- 250
mu <- 1
sd <- sqrt(2)

x_eval <- seq(-3, 5, length.out = 1000) |> matrix()

plot_dr <- function(x_eval, predict_list, dr_fun, dr_fun_args) {
  nsim <- length(predict_list)
  ggplot() +
    geom_line(mapping = aes(x = rep(x_eval, nsim),
                            y = unlist(predict_list),
                            group = rep(1:nrow(x_eval), each = nsim)),
              alpha = 0.1, col = "#014c61") +
    stat_function(fun = dr_fun, args = dr_fun_args) +
    theme_minimal() +
    ylab(expression(hat(italic(r)))) +
    xlab(NULL)
}

dat_lap_norm <- purrr::map(1:nsim, ~list(laplace = rlaplace(n, mu, sd),
                                         norm = rnorm(n, mu, sd)))

dat_lnorm_norm <- purrr::map(1:nsim, ~list(lnorm = rlnorm(n,
                                                          log(mu^2 / sqrt(mu^2 + sd^2)), 
                                                          sqrt(log(1 + sd^2/mu^2))),
                                           norm = rnorm(n, mu, sd)))

dat_t_norm <- purrr::map(1:nsim, ~list(t = rt(n, 2/(1 - 1/sd^2)) + mu,
                                       norm = rnorm(n, mu, sd)))

dat_norm_norm <- purrr::map(1:nsim, ~list(norm1 = rnorm(n, mu, sd),
                                          norm2 = rnorm(n, mu, sd)))

lap_norm_dr <- future_map(dat_lap_norm, ~ulsif(.x$norm, 
                                               .x$laplace,
                                               intercept = FALSE,
                                               progressbar = FALSE),
                          .options = furrr_options(seed = TRUE))

lnorm_norm_dr <- future_map(dat_lnorm_norm, ~ulsif(.x$norm,
                                                   .x$lnorm, 
                                                   intercept = FALSE,
                                                   progressbar = FALSE),
                            .options = furrr_options(seed = TRUE), .progress = TRUE)

t_norm_dr <- future_map(dat_t_norm, ~ulsif(.x$norm,
                                           .x$t, 
                                           intercept = FALSE,
                                           progressbar = FALSE),
                        .options = furrr_options(seed = TRUE))

norm_norm_dr <- future_map(dat_norm_norm, ~ulsif(.x$norm1,
                                                 .x$norm2, 
                                                 intercept = FALSE,
                                                 progressbar = FALSE),
                           .options = furrr_options(seed = TRUE))

lap_norm_predict   <- map(lap_norm_dr, ~predict(.x, newdata = x_eval))
lnorm_norm_predict <- map(lnorm_norm_dr, ~ predict(.x, newdata = x_eval))
t_norm_predict     <- map(t_norm_dr, ~predict(.x, newdata = x_eval))
norm_norm_predict  <- map(norm_norm_dr, ~ predict(.x, newdata = x_eval))

list(
  plot_dr(x_eval, lap_norm_predict, dratio_lap_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Laplace]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, lnorm_norm_predict, dratio_lnorm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[`Log-normal`]))) +
    ylim(-1, 10) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, t_norm_predict, dratio_t_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[lst]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, norm_norm_predict, dratio_norm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Normal]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9))
) |>
  patchwork::wrap_plots(ncol = 2, byrow = TRUE)

unnamed-chunk-1-1

Density ratio estimation with intercept ($n = 250$)

lap_norm_dr_int <- future_map(dat_lap_norm, ~ulsif(.x$norm, 
                                                   .x$laplace,
                                                   progressbar = FALSE),
                              .options = furrr_options(seed = TRUE))

lnorm_norm_dr_int <- future_map(dat_lnorm_norm, ~ulsif(.x$norm,
                                                       .x$lnorm, 
                                                       progressbar = FALSE),
                                .options = furrr_options(seed = TRUE), .progress = TRUE)

t_norm_dr_int <- future_map(dat_t_norm, ~ulsif(.x$norm,
                                               .x$t,
                                               progressbar = FALSE),
                            .options = furrr_options(seed = TRUE))

norm_norm_dr_int <- future_map(dat_norm_norm, ~ulsif(.x$norm1,
                                                     .x$norm2, 
                                                     progressbar = FALSE),
                               .options = furrr_options(seed = TRUE))

lap_norm_predict_int   <- map(lap_norm_dr_int, ~predict(.x, newdata = x_eval))
lnorm_norm_predict_int <- map(lnorm_norm_dr_int, ~ predict(.x, newdata = x_eval))
t_norm_predict_int     <- map(t_norm_dr_int, ~predict(.x, newdata = x_eval))
norm_norm_predict_int  <- map(norm_norm_dr_int, ~ predict(.x, newdata = x_eval))

list(
  plot_dr(x_eval, lap_norm_predict_int, dratio_lap_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Laplace]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, lnorm_norm_predict_int, dratio_lnorm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[`Log-normal`]))) +
    ylim(-1, 10) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, t_norm_predict_int, dratio_t_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[lst]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, norm_norm_predict_int, dratio_norm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Normal]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9))
) |>
  patchwork::wrap_plots(ncol = 2, byrow = TRUE)

unnamed-chunk-2-1

Density ratio estimation without intercept ($n = 1000$)

n <- 1000

dat_lap_norm <- purrr::map(1:nsim, ~list(laplace = rlaplace(n, mu, sd),
                                         norm = rnorm(n, mu, sd)))

dat_lnorm_norm <- purrr::map(1:nsim, ~list(lnorm = rlnorm(n,
                                                          log(mu^2 / sqrt(mu^2 + sd^2)), 
                                                          sqrt(log(1 + sd^2/mu^2))),
                                           norm = rnorm(n, mu, sd)))

dat_t_norm <- purrr::map(1:nsim, ~list(t = rt(n, 2/(1 - 1/sd^2)) + mu,
                                       norm = rnorm(n, mu, sd)))

dat_norm_norm <- purrr::map(1:nsim, ~list(norm1 = rnorm(n, mu, sd),
                                          norm2 = rnorm(n, mu, sd)))

lap_norm_dr <- future_map(dat_lap_norm, ~ulsif(.x$norm, 
                                               .x$laplace,
                                               intercept = FALSE,
                                               progressbar = FALSE),
                          .options = furrr_options(seed = TRUE))

lnorm_norm_dr <- future_map(dat_lnorm_norm, ~ulsif(.x$norm,
                                                   .x$lnorm, 
                                                   intercept = FALSE,
                                                   progressbar = FALSE),
                            .options = furrr_options(seed = TRUE), .progress = TRUE)

t_norm_dr <- future_map(dat_t_norm, ~ulsif(.x$norm,
                                           .x$t, 
                                           intercept = FALSE,
                                           progressbar = FALSE),
                        .options = furrr_options(seed = TRUE))

norm_norm_dr <- future_map(dat_norm_norm, ~ulsif(.x$norm1,
                                                 .x$norm2, 
                                                 intercept = FALSE,
                                                 progressbar = FALSE),
                           .options = furrr_options(seed = TRUE))

lap_norm_predict   <- map(lap_norm_dr, ~predict(.x, newdata = x_eval))
lnorm_norm_predict <- map(lnorm_norm_dr, ~ predict(.x, newdata = x_eval))
t_norm_predict     <- map(t_norm_dr, ~predict(.x, newdata = x_eval))
norm_norm_predict  <- map(norm_norm_dr, ~ predict(.x, newdata = x_eval))

list(
  plot_dr(x_eval, lap_norm_predict, dratio_lap_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Laplace]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, lnorm_norm_predict, dratio_lnorm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[`Log-normal`]))) +
    ylim(-1, 10) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, t_norm_predict, dratio_t_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[lst]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, norm_norm_predict, dratio_norm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Normal]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9))
) |>
  patchwork::wrap_plots(ncol = 2, byrow = TRUE)

unnamed-chunk-3-1

Density ratio estimation with intercept ($n = 1000$)

lap_norm_dr_int <- future_map(dat_lap_norm, ~ulsif(.x$norm, 
                                                   .x$laplace,
                                                   progressbar = FALSE),
                              .options = furrr_options(seed = TRUE))

lnorm_norm_dr_int <- future_map(dat_lnorm_norm, ~ulsif(.x$norm,
                                                       .x$lnorm, 
                                                       progressbar = FALSE),
                                .options = furrr_options(seed = TRUE), .progress = TRUE)

t_norm_dr_int <- future_map(dat_t_norm, ~ulsif(.x$norm,
                                               .x$t,
                                               progressbar = FALSE),
                            .options = furrr_options(seed = TRUE))

norm_norm_dr_int <- future_map(dat_norm_norm, ~ulsif(.x$norm1,
                                                     .x$norm2, 
                                                     progressbar = FALSE),
                               .options = furrr_options(seed = TRUE))

lap_norm_predict_int   <- map(lap_norm_dr_int, ~predict(.x, newdata = x_eval))
lnorm_norm_predict_int <- map(lnorm_norm_dr_int, ~ predict(.x, newdata = x_eval))
t_norm_predict_int     <- map(t_norm_dr_int, ~predict(.x, newdata = x_eval))
norm_norm_predict_int  <- map(norm_norm_dr_int, ~ predict(.x, newdata = x_eval))

list(
  plot_dr(x_eval, lap_norm_predict_int, dratio_lap_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Laplace]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, lnorm_norm_predict_int, dratio_lnorm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[`Log-normal`]))) +
    ylim(-1, 10) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, t_norm_predict_int, dratio_t_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[lst]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9)),
  plot_dr(x_eval, norm_norm_predict_int, dratio_norm_norm, list(mu = mu, sd = sd)) +
    ggtitle(expression(italic(P[Normal])/italic(P[Normal]))) +
    ylim(-1, 4) +
    theme(text = element_text(family = "LM Roman 10", size = 9))
) |>
  patchwork::wrap_plots(ncol = 2, byrow = TRUE)

unnamed-chunk-4-1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment