Spectral density ratio estimation

Izbicki, Lee & Schafer (2014) show that the density ratio can be computed through a eigen decomposition of the kernel gram matrix. However, for a data set of size $n$, the kernel Gram matrix takes on dimensions $n \times n$ and an eigen decomposition has complexity $\mathcal{O}(n^3)$. Fortunately, it is possible to approximate the solution with a subset of the kernel Gram matrix. That is, we can perform an eigen decomposition of a subset of $k \geq n$ rows and columns of the kernel Gram matrix to approximate the original solution.

library(ggplot2)
library(patchwork)

set.seed(123)

N <- 500
P <- 2

rho <- 0.2
S1 <- rho + (1-rho) * diag(P)
S2 <- 2*rho + (1 - 2*rho) * diag(P)

F <- rnorm(N*P, 1) |> matrix(N) %*% chol(S1)
G <- rnorm(N*P) |> matrix(N) %*% chol(S2)

# G = de

colMeans(F)
#> [1] 1.034590 1.184426
colMeans(G)
#> [1] 0.02620075 0.06430701


KF <- densityratio:::distance(F, G) |>
  densityratio:::kernel_gaussian(sigma = 1)

KG <- densityratio:::distance(G, G) |>
  densityratio:::kernel_gaussian(sigma = 1)


J <- 20
E <- eigen(KG)
phi_eigen <- E$vectors[,1:J]
phihat_eigen <- KF %*% phi_eigen[,1:J] %*% diag(sqrt(N)/E$values[1:J])

Bhatj_eigen <- colMeans(phihat_eigen)
Bhat_eigen <- pmax(1e-6, phihat_eigen %*% Bhatj_eigen)

phihatpred_eigen <- rbind(KF, KG) %*% phi_eigen[,1:J] %*% diag(sqrt(N)/E$values[1:J])

dat <- rbind(F, G) |> as.data.frame()
dat$group <- c(rep("F", N), rep("G", N))

p1 <- ggplot(dat, aes(x = V1, y = V2, color = group)) +
  geom_point() +
  theme_minimal()

p2 <- ggplot(rbind(F, G) |> as.data.frame(), 
       aes(x = V1, y = V2, color = phihatpred_eigen %*% Bhatj_eigen)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  theme(legend.title = element_blank())


N_approx <- 200

eigen_approx <- eigen(KG[1:N_approx,1:N_approx])
phi_eigen_approx <- eigen_approx$vectors[,1:J]
phihat_eigen_approx <- KF[,1:N_approx] %*% phi_eigen_approx %*% diag(sqrt(N_approx)/eigen_approx$values[1:J])

Bhatj_eigen_approx <- colMeans(phihat_eigen_approx)
Bhat_svd_approx <- pmax(1e-6, phihat_eigen_approx %*% Bhatj_eigen_approx)

phihatpred_eigen_approx <- rbind(KF[,1:N_approx], KG[,1:N_approx]) %*% phi_eigen_approx %*% diag(sqrt(N_approx)/eigen_approx$values[1:J])

p3 <- ggplot(rbind(F, G) |> as.data.frame(),
       aes(x = V1, y = V2, color = phihatpred_eigen_approx %*% Bhatj_eigen_approx)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  theme(legend.title = element_blank())

p1

p2 + p3

For what it's worth, it is also possible to obtain the original eigen-solution with a singular value decomposition. However, to do a singular value decomposition on the kernel Gram matrix also requires a different calculation of the sample size in psihat. How to do this is left for future work.

SVD <- svd(KG, nu = J, nv = J)

phi_svd <- SVD$v
phihat_svd <- KF %*% phi_svd %*% diag(sqrt(N)/SVD$d[1:J])

Bhatj_svd <- colMeans(phihat_svd)
Bhat_svd <- pmax(1e-6, phihat_svd %*% Bhatj_svd)

phihatpred_svd <- rbind(KF, KG) %*% phi_svd %*% diag(sqrt(N)/SVD$d[1:J])

p4 <- ggplot(rbind(F, G) |> as.data.frame(), 
             aes(x = V1, y = V2, color = phihatpred_svd %*% Bhatj_svd)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  theme(legend.title = element_blank())

p2 + p4

^{Created on 2024-03-30 with reprex v2.0.2}

thomvolker/spectral_dre_svd.md

Spectral density ratio estimation