Last active
October 20, 2022 10:56
-
-
Save vankesteren/eb18bb7e93e66fab2300d68c5a1ea1ac to your computer and use it in GitHub Desktop.
Low-dimensional visualisation and density ratio estimation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Low-dimensional visualisation and density ratio estimation | |
library(tidyverse) | |
library(patchwork) | |
library(umap) | |
library(lfda) | |
library(densratio) | |
# generate data | |
N <- 500 | |
P <- 3 | |
rho_1 <- 0 | |
mu_1 <- 0 | |
rho_2 <- 0.5 | |
mu_2 <- 0 | |
S1 <- matrix(rho_1, P, P) | |
diag(S1) <- 1 | |
X1 <- matrix(rnorm(N*P, mu_1), N) %*% chol(S1) | |
S2 <- matrix(rho_2, P, P) | |
diag(S2) <- 1 | |
X2 <- matrix(rnorm(N*P, mu_2), N) %*% chol(S2) | |
id <- rep(c(0, 1), each = N) | |
# PCA | |
p_pca <- | |
rbind(X1, X2) |> | |
princomp() |> | |
(\(x) x$scores)() |> | |
as_tibble() |> | |
mutate(id = factor(id)) |> | |
ggplot(aes(x = Comp.1, y = Comp.2, fill = id, colour = id)) + | |
geom_point() + | |
theme_minimal() + | |
ggtitle("PCA") | |
# Local discriminant analysis | |
res_lfda <- lfda(x = rbind(X1, X2), y = id, r = 2, metric = "orthonormalized") | |
p_lfda <- | |
res_lfda$Z |> | |
as_tibble() |> | |
mutate(id = factor(id)) |> | |
ggplot(aes(x = V1, y = V2)) + | |
geom_point(aes(colour = id)) + | |
theme_minimal() + | |
ggtitle("LFDA") | |
# using umap | |
res_umap <- umap(rbind(X1, X2)) | |
p_umap <- | |
res_umap$layout |> | |
as_tibble() |> | |
mutate(id = factor(id)) |> | |
ggplot(aes(x = V1, y = V2, fill = id, colour = id)) + | |
geom_point() + | |
theme_minimal() + | |
ggtitle("UMAP") | |
p_pca + p_lfda + p_umap %+% coord_fixed() | |
# Density ratio plotting and estimation (Sugiyama et al., 2018) | |
density_ratio_plot <- function(df_true, df_syn) { | |
# perform lfda | |
id <- factor(c(rep("true", nrow(df_true)), rep("syn", nrow(df_syn)))) | |
projected <- lfda(x = bind_rows(df_true, df_syn), y = id, r = 2, metric = "orthonormalized") | |
Z <- as_tibble(projected$Z) |> set_names(c("dim_1", "dim_2")) | |
# compute density ratio on low-dimensional projection | |
dr <- densratio(x1 = Z |> filter(id == "true"), x2 = Z |> filter(id == "syn"), verbose = FALSE) | |
# estimate log-density ratio for synthetic data | |
dr_syn <- -log(dr$compute_density_ratio(Z |> filter(id == "syn"))) | |
# create grid | |
dr_grid <- expand_grid( | |
dim_1 = seq(min(Z$dim_1), max(Z$dim_1), length.out = 100), | |
dim_2 = seq(min(Z$dim_2), max(Z$dim_2), length.out = 100) | |
) | |
# estimate DR for whole grid | |
dr_grid$R <- dr$compute_density_ratio(as.matrix(dr_grid)) | |
# create plot | |
plt <- | |
ggplot(dr_grid, aes(x = dim_1, y = dim_2)) + | |
geom_contour_filled(aes(z = log(R)), alpha = 0.3) + | |
geom_contour(aes(z = log(R)), colour = "grey") + | |
geom_point(data = Z, aes(colour = id)) + | |
theme_minimal() + | |
labs( | |
title = "Density ratio plot", | |
subtitle = paste("E[log-DR] of synth. data:", | |
round(mean(dr_syn), 3), "\u00b1", # mean | |
round(sd(dr_syn)/sqrt(length(dr_syn)), 3)), # standard error | |
x = "Dimension 1", | |
y = "Dimension 2" | |
) + | |
scale_fill_viridis_d(guide = "none") + | |
coord_fixed() | |
plt | |
} | |
density_ratio_plot(as_tibble(X1), as_tibble(X2)) |
Author
vankesteren
commented
Oct 20, 2022
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment