This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
def compute_H_inv_cubic(A, num_iters=10): | |
X = torch.eye(A.shape[-1], dtype=A.dtype, device=A.device).expand(A.shape) | |
for _ in range(num_iters): | |
X_update = torch.einsum('...ij,...jk,...kl,...lm->...im', X, A, X, X) | |
X = 1.5 * X - 0.5 * X_update | |
return X | |
def compute_H_inv_quintic(A, num_iters=5): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.datasets import make_spd_matrix | |
""" | |
Factorized newton schulz iters for inverse of SPD matrix, from paper | |
by Alexander Stotsky https://arxiv.org/pdf/2208.04068 | |
From efficiency equation EI = n^(1/np) where EI is the efficiency, n is the | |
order of the algorithm, and np is the number of matmuls in the algorithm, n=11 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
def _qdwh_qr_step(u, params): | |
a_minus_e_by_sqrt_c, sqrt_c, e = params | |
M, N = u.shape | |
eye_n = torch.eye(N, dtype=u.dtype, device=u.device) | |
y = torch.cat((sqrt_c * u, eye_n), dim=0) | |
q, _ = torch.linalg.qr(y, mode='reduced') | |
q1, q2 = q[:M, :], q[M:, :] | |
return e * u + a_minus_e_by_sqrt_c * (q1 @ q2.mT) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from tqdm import tqdm | |
import numpy as np | |
import jax | |
import jax.numpy as jnp | |
from flax.training.train_state import TrainState | |
import optax | |
import optax.tree_utils as otu | |
import tensorflow as tf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jax | |
import jax.numpy as jnp | |
import flax.linen as nn | |
import optax | |
from tensorflow_probability.substrates.jax import distributions as tfd | |
""" | |
There's a typo in most B-TCVAE implementations on github, so I thought I'd make a | |
quick gist of a working B-TCVAE. |