Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Last active October 20, 2022 10:56
Show Gist options
  • Save vankesteren/eb18bb7e93e66fab2300d68c5a1ea1ac to your computer and use it in GitHub Desktop.
Save vankesteren/eb18bb7e93e66fab2300d68c5a1ea1ac to your computer and use it in GitHub Desktop.
Low-dimensional visualisation and density ratio estimation
# Low-dimensional visualisation and density ratio estimation
library(tidyverse)
library(patchwork)
library(umap)
library(lfda)
library(densratio)
# generate data
N <- 500
P <- 3
rho_1 <- 0
mu_1 <- 0
rho_2 <- 0.5
mu_2 <- 0
S1 <- matrix(rho_1, P, P)
diag(S1) <- 1
X1 <- matrix(rnorm(N*P, mu_1), N) %*% chol(S1)
S2 <- matrix(rho_2, P, P)
diag(S2) <- 1
X2 <- matrix(rnorm(N*P, mu_2), N) %*% chol(S2)
id <- rep(c(0, 1), each = N)
# PCA
p_pca <-
rbind(X1, X2) |>
princomp() |>
(\(x) x$scores)() |>
as_tibble() |>
mutate(id = factor(id)) |>
ggplot(aes(x = Comp.1, y = Comp.2, fill = id, colour = id)) +
geom_point() +
theme_minimal() +
ggtitle("PCA")
# Local discriminant analysis
res_lfda <- lfda(x = rbind(X1, X2), y = id, r = 2, metric = "orthonormalized")
p_lfda <-
res_lfda$Z |>
as_tibble() |>
mutate(id = factor(id)) |>
ggplot(aes(x = V1, y = V2)) +
geom_point(aes(colour = id)) +
theme_minimal() +
ggtitle("LFDA")
# using umap
res_umap <- umap(rbind(X1, X2))
p_umap <-
res_umap$layout |>
as_tibble() |>
mutate(id = factor(id)) |>
ggplot(aes(x = V1, y = V2, fill = id, colour = id)) +
geom_point() +
theme_minimal() +
ggtitle("UMAP")
p_pca + p_lfda + p_umap %+% coord_fixed()
# Density ratio plotting and estimation (Sugiyama et al., 2018)
density_ratio_plot <- function(df_true, df_syn) {
# perform lfda
id <- factor(c(rep("true", nrow(df_true)), rep("syn", nrow(df_syn))))
projected <- lfda(x = bind_rows(df_true, df_syn), y = id, r = 2, metric = "orthonormalized")
Z <- as_tibble(projected$Z) |> set_names(c("dim_1", "dim_2"))
# compute density ratio on low-dimensional projection
dr <- densratio(x1 = Z |> filter(id == "true"), x2 = Z |> filter(id == "syn"), verbose = FALSE)
# estimate log-density ratio for synthetic data
dr_syn <- -log(dr$compute_density_ratio(Z |> filter(id == "syn")))
# create grid
dr_grid <- expand_grid(
dim_1 = seq(min(Z$dim_1), max(Z$dim_1), length.out = 100),
dim_2 = seq(min(Z$dim_2), max(Z$dim_2), length.out = 100)
)
# estimate DR for whole grid
dr_grid$R <- dr$compute_density_ratio(as.matrix(dr_grid))
# create plot
plt <-
ggplot(dr_grid, aes(x = dim_1, y = dim_2)) +
geom_contour_filled(aes(z = log(R)), alpha = 0.3) +
geom_contour(aes(z = log(R)), colour = "grey") +
geom_point(data = Z, aes(colour = id)) +
theme_minimal() +
labs(
title = "Density ratio plot",
subtitle = paste("E[log-DR] of synth. data:",
round(mean(dr_syn), 3), "\u00b1", # mean
round(sd(dr_syn)/sqrt(length(dr_syn)), 3)), # standard error
x = "Dimension 1",
y = "Dimension 2"
) +
scale_fill_viridis_d(guide = "none") +
coord_fixed()
plt
}
density_ratio_plot(as_tibble(X1), as_tibble(X2))
@vankesteren
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment