Izbicki, Lee & Schafer (2014) show that the density ratio can be
computed through a eigen decomposition of the kernel gram matrix. However, for a data set of size
library(ggplot2)
library(patchwork)
set.seed(123)
N <- 500
P <- 2
rho <- 0.2
S1 <- rho + (1-rho) * diag(P)
S2 <- 2*rho + (1 - 2*rho) * diag(P)
F <- rnorm(N*P, 1) |> matrix(N) %*% chol(S1)
G <- rnorm(N*P) |> matrix(N) %*% chol(S2)
# G = de
colMeans(F)
#> [1] 1.034590 1.184426
colMeans(G)
#> [1] 0.02620075 0.06430701
KF <- densityratio:::distance(F, G) |>
densityratio:::kernel_gaussian(sigma = 1)
KG <- densityratio:::distance(G, G) |>
densityratio:::kernel_gaussian(sigma = 1)
J <- 20
E <- eigen(KG)
phi_eigen <- E$vectors[,1:J]
phihat_eigen <- KF %*% phi_eigen[,1:J] %*% diag(sqrt(N)/E$values[1:J])
Bhatj_eigen <- colMeans(phihat_eigen)
Bhat_eigen <- pmax(1e-6, phihat_eigen %*% Bhatj_eigen)
phihatpred_eigen <- rbind(KF, KG) %*% phi_eigen[,1:J] %*% diag(sqrt(N)/E$values[1:J])
dat <- rbind(F, G) |> as.data.frame()
dat$group <- c(rep("F", N), rep("G", N))
p1 <- ggplot(dat, aes(x = V1, y = V2, color = group)) +
geom_point() +
theme_minimal()
p2 <- ggplot(rbind(F, G) |> as.data.frame(),
aes(x = V1, y = V2, color = phihatpred_eigen %*% Bhatj_eigen)) +
geom_point(alpha = 0.6) +
theme_minimal() +
theme(legend.title = element_blank())
N_approx <- 200
eigen_approx <- eigen(KG[1:N_approx,1:N_approx])
phi_eigen_approx <- eigen_approx$vectors[,1:J]
phihat_eigen_approx <- KF[,1:N_approx] %*% phi_eigen_approx %*% diag(sqrt(N_approx)/eigen_approx$values[1:J])
Bhatj_eigen_approx <- colMeans(phihat_eigen_approx)
Bhat_svd_approx <- pmax(1e-6, phihat_eigen_approx %*% Bhatj_eigen_approx)
phihatpred_eigen_approx <- rbind(KF[,1:N_approx], KG[,1:N_approx]) %*% phi_eigen_approx %*% diag(sqrt(N_approx)/eigen_approx$values[1:J])
p3 <- ggplot(rbind(F, G) |> as.data.frame(),
aes(x = V1, y = V2, color = phihatpred_eigen_approx %*% Bhatj_eigen_approx)) +
geom_point(alpha = 0.6) +
theme_minimal() +
theme(legend.title = element_blank())
p1p2 + p3For what it's worth, it is also possible to obtain the original eigen-solution with a singular value decomposition.
However, to do a singular value decomposition on the kernel Gram matrix also requires a different calculation of the
sample size in psihat. How to do this is left for future work.
SVD <- svd(KG, nu = J, nv = J)
phi_svd <- SVD$v
phihat_svd <- KF %*% phi_svd %*% diag(sqrt(N)/SVD$d[1:J])
Bhatj_svd <- colMeans(phihat_svd)
Bhat_svd <- pmax(1e-6, phihat_svd %*% Bhatj_svd)
phihatpred_svd <- rbind(KF, KG) %*% phi_svd %*% diag(sqrt(N)/SVD$d[1:J])
p4 <- ggplot(rbind(F, G) |> as.data.frame(),
aes(x = V1, y = V2, color = phihatpred_svd %*% Bhatj_svd)) +
geom_point(alpha = 0.6) +
theme_minimal() +
theme(legend.title = element_blank())
p2 + p4Created on 2024-03-30 with reprex v2.0.2


