Last active
July 13, 2018 15:27
-
-
Save tvercaut/bd9fe8c5d12ab529babd9bf5434d7cda to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import absolute_import, division, print_function | |
import tensorflow as tf | |
from tensorflow.python.framework import ops | |
from tensorflow.python.ops import array_ops | |
from tensorflow.python.ops import sparse_ops | |
import numpy as np | |
import scipy.linalg | |
def my_omexpovx_func(x): | |
""" | |
A utility function to get the scalar function (1-exp(-x))/x | |
""" | |
# Start by the basic formula | |
out = np.divide( (1-np.exp(-x)), x ) | |
# if the denominator is small, resort to a Taylor expansion | |
# TODO find a properly motivated threshold for switching to the Taylor expansion | |
idx = (abs(x) < 1e-6) | |
x2idx = np.square(x[idx]) | |
out[idx] = 1 - x[idx]/2 + x2idx/6 | |
return out | |
def expm_grad_from_najfeld(M, expM): | |
""" | |
Compute the derivative of the matrix exponential | |
For the general case in which the matrix may not be diagonalisable, | |
Najfeld and Havel also provide two other formulas using the adjoint | |
of the matrix and some matrix functionns | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
# some useful precomputations | |
IkeM = np.kron(np.eye(k), expM) | |
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k)) | |
# Apply my_func as a matrix function to the adjoint | |
funadM = scipy.linalg.funm(adM, my_omexpovx_func ) | |
# Use the first formula from Najfeld and Havel | |
return np.dot(IkeM, funadM) | |
def expm_grad_from_frechet(M): | |
""" | |
Compute the derivative of the matrix exponential by looking | |
at the directional derivative (Frechet derivative) across | |
the canonical basis | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
DexpM = np.zeros((k2,k2)) | |
for i in range(0,k): | |
for j in range(0,k): | |
E = np.zeros((k,k)) | |
E[i,j] = 1 | |
eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False) | |
DexpM[:,j*k+i] = vec(eFME) | |
return DexpM | |
def vec(mat): | |
""" | |
vectorise a matrix into a vector | |
""" | |
return mat.ravel('F') | |
def unvec(v): | |
""" | |
Unvectorise a vector representing a square matrix | |
""" | |
k = int(np.sqrt(len(v))) | |
assert(k * k == len(v)) | |
return v.reshape((k, k), order='F') | |
def cvec(mat): | |
""" | |
vectorise a matrix into a vector (column major mode) | |
""" | |
return mat.ravel('C') | |
def uncvec(v): | |
""" | |
Unvectorise a vector (column major mode) representing a square matrix | |
""" | |
k = int(np.sqrt(len(v))) | |
assert(k * k == len(v)) | |
return v.reshape((k, k), order='C') | |
@ops.RegisterGradient("MatrixExponential") | |
def _expm_grad(op, grad): | |
# We want the backward-mode gradient. Let X be the NxN input matrix. | |
# Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X. | |
# Let Y be the NxN previous gradient in the backward AD. We want | |
# unvec( ( vec(Y)^T . J(X) )^T ) | |
# = unvec( J(X)^T . vec(Y) ) | |
# = unvec( J(X^T) . vec(Y) ) | |
# which is the forward-mode derivative applied to the transpose | |
grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False) | |
return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64) | |
tf.enable_eager_execution() | |
tfe = tf.contrib.eager | |
#x = tfe.Variable([[1.,3.],[-7.,9.]]) | |
x = tfe.Variable([[1.,2.],[3.,4.]], dtype=tf.float64) | |
with tf.GradientTape(persistent=True) as tape: | |
x2 = tf.matmul(x,x) | |
expx = tf.linalg.expm(x) | |
grad_x2 = tape.gradient(x2, x) | |
grad_exp = tape.gradient(expx, x) | |
print("\n{}^2=\n {}".format(x.numpy(),x2)) | |
print("\ngrad x2 =\n {}".format(grad_x2)) | |
# Get the full Jacobian of the matrix squaring | |
# Note that the standard formula assumes column-major vectorisation | |
# while numpy uses row-major ordering | |
Dx2 = np.kron(np.transpose(x.numpy()),np.eye(2))+np.kron(np.eye(2),x.numpy()) | |
print("Jac x^2 =\n {}".format(Dx2)) | |
DxT2 = np.kron(x.numpy(),np.eye(2))+np.kron(np.eye(2),np.transpose(x.numpy())) | |
print("Jac x^T^2 =\n {}".format(Dx2)) | |
old_grad = np.ones((2,2)) | |
grad_x2_np = unvec(np.matmul(vec(old_grad),Dx2)) | |
print("Manual grad x2 =\n {}".format(grad_x2_np)) | |
print("expm({})=\n {}".format(x.numpy(),expx)) | |
print("grad expm =\n {}".format(grad_exp)) | |
manual_jac = expm_grad_from_frechet(x.numpy()) | |
gradj_np = unvec(np.matmul(vec(old_grad),manual_jac)) | |
print("Manual grad expm =\n {}".format(gradj_np)) | |
gradf_np = scipy.linalg.expm_frechet(np.transpose(x.numpy()),old_grad,compute_expm=False) | |
print("Manual grad expm v2 =\n {}".format(gradf_np)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import absolute_import, division, print_function | |
import tensorflow as tf | |
from tensorflow.python.framework import ops | |
from tensorflow.python.ops import array_ops | |
from tensorflow.python.ops import sparse_ops | |
import numpy as np | |
import scipy.linalg | |
def my_omexpovx_func(x): | |
""" | |
A utility function to get the scalar function (1-exp(-x))/x | |
""" | |
# Start by the basic formula | |
out = np.divide( (1-np.exp(-x)), x ) | |
# if the denominator is small, resort to a Taylor expansion | |
# TODO find a properly motivated threshold for switching to the Taylor expansion | |
idx = (abs(x) < 1e-6) | |
x2idx = np.square(x[idx]) | |
out[idx] = 1 - x[idx]/2 + x2idx/6 | |
return out | |
def expm_grad_from_najfeld(M, expM): | |
""" | |
Compute the derivative of the matrix exponential | |
For the general case in which the matrix may not be diagonalisable, | |
Najfeld and Havel also provide two other formulas using the adjoint | |
of the matrix and some matrix functionns | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
# some useful precomputations | |
IkeM = np.kron(np.eye(k), expM) | |
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k)) | |
# Apply my_func as a matrix function to the adjoint | |
funadM = scipy.linalg.funm(adM, my_omexpovx_func ) | |
# Use the first formula from Najfeld and Havel | |
return np.dot(IkeM, funadM) | |
def expm_grad_from_frechet(M): | |
""" | |
Compute the derivative of the matrix exponential by looking | |
at the directional derivative (Frechet derivative) across | |
the canonical basis | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
DexpM = np.zeros((k2,k2)) | |
for i in range(0,k): | |
for j in range(0,k): | |
E = np.zeros((k,k)) | |
E[i,j] = 1 | |
eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False) | |
DexpM[:,j*k+i] = vec(eFME) | |
return DexpM | |
def transposition_matrix(M,N): | |
MN = M*N | |
vec_indices = np.arange(MN).reshape((M, N)) | |
new_indices = np.transpose(vec_indices) | |
mat = np.zeros((MN,MN)) | |
mat[vec_indices.ravel(),new_indices.ravel()] = 1 | |
return mat | |
def col_to_row_major_derivation_matrix(m): | |
N = int(np.sqrt(m.shape[0])) | |
P = transposition_matrix(N,N) | |
return np.matmul(np.transpose(P),np.matmul(m,(P))) | |
def vec(mat): | |
""" | |
vectorise a matrix into a vector | |
""" | |
return mat.ravel('F') | |
def unvec(v): | |
""" | |
Unvectorise a vector representing a square matrix | |
""" | |
k = int(np.sqrt(len(v))) | |
assert(k * k == len(v)) | |
return v.reshape((k, k), order='F') | |
def cvec(mat): | |
""" | |
vectorise a matrix into a vector (column major mode) | |
""" | |
return mat.ravel('C') | |
def uncvec(v): | |
""" | |
Unvectorise a vector (column major mode) representing a square matrix | |
""" | |
k = int(np.sqrt(len(v))) | |
assert(k * k == len(v)) | |
return v.reshape((k, k), order='C') | |
@ops.RegisterGradient("MatrixExponential") | |
def _expm_grad(op, grad): | |
# We want the backward-mode gradient. Let X be the NxN input matrix. | |
# Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X. | |
# Let Y be the NxN previous gradient in the backward AD. We want | |
# unvec( ( vec(Y)^T . J(X) )^T ) | |
# = unvec( J(X)^T . vec(Y) ) | |
# = unvec( J(X^T) . vec(Y) ) | |
# which is the forward-mode derivative applied to the transpose | |
grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False) | |
return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64) # List of one Tensor, since we have one input | |
tf.reset_default_graph() | |
N = 2 | |
x = tf.placeholder(tf.float64, shape=[N, N]) | |
y = tf.linalg.expm(x) | |
z = tf.matmul(x,x) | |
#z = tf.stack([tf.matmul(x,x),x]) | |
y_list = tf.unstack(tf.reshape(y, [-1])) | |
jacobian_list = [tf.reshape(tf.gradients(y_, x)[0],[-1]) for y_ in y_list] | |
jacobian_cmaj = tf.stack(jacobian_list) | |
z_list = tf.unstack(tf.reshape(z, [-1])) | |
jacobian2_list = [tf.reshape(tf.gradients(z_, x)[0],[-1]) for z_ in z_list] | |
jacobian2_cmaj = tf.stack(jacobian2_list) | |
sess = tf.Session() | |
#x_val = np.array([[1.,0.2],[0.3,-4.]]) | |
x_val = np.eye(N)+np.random.randn(N,N) | |
jacobian_cmaj_val, jacobian2_cmaj_val, jacobian_list_val, jacobian2_list_val = sess.run([jacobian_cmaj,jacobian2_cmaj,jacobian_list,jacobian2_list], feed_dict={x:x_val}) | |
#manual_jac_rmaj_bis = expm_grad_from_najfeld(x_val,scipy.linalg.expm(x_val)) | |
manual_jac_rmaj = expm_grad_from_frechet(x_val) | |
print("x=\n {}".format(x_val)) | |
print("\nexpm\n=====\n") | |
print("\ntf cmaj jac=\n") | |
print(jacobian_cmaj_val) | |
print("\ntf jac list=\n") | |
print(jacobian_list_val) | |
print("\nmanual rmaj jac=\n") | |
print(manual_jac_rmaj) | |
#print(manual_jac_rmaj_bis) | |
#manual_cmaj_jac_bis = expm_grad_from_najfeld(np.transpose(x_val), scipy.linalg.expm(np.transpose(x_val)) ) | |
manual_cmaj_jac = expm_grad_from_frechet(np.transpose(x_val)) | |
print("\nmanual cmaj attempt jac=\n") | |
print(manual_cmaj_jac) | |
jc2r = col_to_row_major_derivation_matrix(jacobian_cmaj_val) | |
print("\ncol to row tf jac=\n") | |
print(jc2r) | |
print("\ndiff manual_jac_rmaj-jc2r:\n") | |
print(manual_jac_rmaj-jc2r) | |
print("\ndiff jacobian_cmaj_val-manual_cmaj_jac:\n") | |
print(jacobian_cmaj_val-manual_cmaj_jac) | |
print("\nx^2\n=====\n") | |
manual_jac2_rmaj = np.kron(np.transpose(x_val),np.eye(N))+np.kron(np.eye(N),x_val) | |
print("\ntf cmaj jac2=\n") | |
print(jacobian2_cmaj_val) | |
print("\ntf jac2 list=\n") | |
print(jacobian2_list_val) | |
print("\nmanual rmaj jac2=\n") | |
print(manual_jac2_rmaj) | |
manual_cmaj_jac2 = np.kron(x_val,np.eye(N))+np.kron(np.eye(N),np.transpose(x_val)) | |
print("\nmanual cmaj attempt jac2=\n") | |
print(manual_cmaj_jac2) | |
j2c2r = col_to_row_major_derivation_matrix(jacobian2_cmaj_val) | |
print("\ncol to row tf jac2=\n") | |
print(j2c2r) | |
print("\ndiff manual_jac2_rmaj-j2c2r:\n") | |
print(manual_jac2_rmaj-j2c2r) | |
print("\ndiff jacobian2_cmaj_val-manual_cmaj_jac2:\n") | |
print(jacobian2_cmaj_val-manual_cmaj_jac2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment