Skip to content

Instantly share code, notes, and snippets.

@tvercaut
Last active July 13, 2018 15:27
Show Gist options
  • Save tvercaut/bd9fe8c5d12ab529babd9bf5434d7cda to your computer and use it in GitHub Desktop.
Save tvercaut/bd9fe8c5d12ab529babd9bf5434d7cda to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
import numpy as np
import scipy.linalg
def my_omexpovx_func(x):
"""
A utility function to get the scalar function (1-exp(-x))/x
"""
# Start by the basic formula
out = np.divide( (1-np.exp(-x)), x )
# if the denominator is small, resort to a Taylor expansion
# TODO find a properly motivated threshold for switching to the Taylor expansion
idx = (abs(x) < 1e-6)
x2idx = np.square(x[idx])
out[idx] = 1 - x[idx]/2 + x2idx/6
return out
def expm_grad_from_najfeld(M, expM):
"""
Compute the derivative of the matrix exponential
For the general case in which the matrix may not be diagonalisable,
Najfeld and Havel also provide two other formulas using the adjoint
of the matrix and some matrix functionns
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
# some useful precomputations
IkeM = np.kron(np.eye(k), expM)
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
# Apply my_func as a matrix function to the adjoint
funadM = scipy.linalg.funm(adM, my_omexpovx_func )
# Use the first formula from Najfeld and Havel
return np.dot(IkeM, funadM)
def expm_grad_from_frechet(M):
"""
Compute the derivative of the matrix exponential by looking
at the directional derivative (Frechet derivative) across
the canonical basis
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
DexpM = np.zeros((k2,k2))
for i in range(0,k):
for j in range(0,k):
E = np.zeros((k,k))
E[i,j] = 1
eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False)
DexpM[:,j*k+i] = vec(eFME)
return DexpM
def vec(mat):
"""
vectorise a matrix into a vector
"""
return mat.ravel('F')
def unvec(v):
"""
Unvectorise a vector representing a square matrix
"""
k = int(np.sqrt(len(v)))
assert(k * k == len(v))
return v.reshape((k, k), order='F')
def cvec(mat):
"""
vectorise a matrix into a vector (column major mode)
"""
return mat.ravel('C')
def uncvec(v):
"""
Unvectorise a vector (column major mode) representing a square matrix
"""
k = int(np.sqrt(len(v)))
assert(k * k == len(v))
return v.reshape((k, k), order='C')
@ops.RegisterGradient("MatrixExponential")
def _expm_grad(op, grad):
# We want the backward-mode gradient. Let X be the NxN input matrix.
# Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X.
# Let Y be the NxN previous gradient in the backward AD. We want
# unvec( ( vec(Y)^T . J(X) )^T )
# = unvec( J(X)^T . vec(Y) )
# = unvec( J(X^T) . vec(Y) )
# which is the forward-mode derivative applied to the transpose
grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False)
return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64)
tf.enable_eager_execution()
tfe = tf.contrib.eager
#x = tfe.Variable([[1.,3.],[-7.,9.]])
x = tfe.Variable([[1.,2.],[3.,4.]], dtype=tf.float64)
with tf.GradientTape(persistent=True) as tape:
x2 = tf.matmul(x,x)
expx = tf.linalg.expm(x)
grad_x2 = tape.gradient(x2, x)
grad_exp = tape.gradient(expx, x)
print("\n{}^2=\n {}".format(x.numpy(),x2))
print("\ngrad x2 =\n {}".format(grad_x2))
# Get the full Jacobian of the matrix squaring
# Note that the standard formula assumes column-major vectorisation
# while numpy uses row-major ordering
Dx2 = np.kron(np.transpose(x.numpy()),np.eye(2))+np.kron(np.eye(2),x.numpy())
print("Jac x^2 =\n {}".format(Dx2))
DxT2 = np.kron(x.numpy(),np.eye(2))+np.kron(np.eye(2),np.transpose(x.numpy()))
print("Jac x^T^2 =\n {}".format(Dx2))
old_grad = np.ones((2,2))
grad_x2_np = unvec(np.matmul(vec(old_grad),Dx2))
print("Manual grad x2 =\n {}".format(grad_x2_np))
print("expm({})=\n {}".format(x.numpy(),expx))
print("grad expm =\n {}".format(grad_exp))
manual_jac = expm_grad_from_frechet(x.numpy())
gradj_np = unvec(np.matmul(vec(old_grad),manual_jac))
print("Manual grad expm =\n {}".format(gradj_np))
gradf_np = scipy.linalg.expm_frechet(np.transpose(x.numpy()),old_grad,compute_expm=False)
print("Manual grad expm v2 =\n {}".format(gradf_np))
#!/usr/bin/env python
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
import numpy as np
import scipy.linalg
def my_omexpovx_func(x):
"""
A utility function to get the scalar function (1-exp(-x))/x
"""
# Start by the basic formula
out = np.divide( (1-np.exp(-x)), x )
# if the denominator is small, resort to a Taylor expansion
# TODO find a properly motivated threshold for switching to the Taylor expansion
idx = (abs(x) < 1e-6)
x2idx = np.square(x[idx])
out[idx] = 1 - x[idx]/2 + x2idx/6
return out
def expm_grad_from_najfeld(M, expM):
"""
Compute the derivative of the matrix exponential
For the general case in which the matrix may not be diagonalisable,
Najfeld and Havel also provide two other formulas using the adjoint
of the matrix and some matrix functionns
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
# some useful precomputations
IkeM = np.kron(np.eye(k), expM)
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
# Apply my_func as a matrix function to the adjoint
funadM = scipy.linalg.funm(adM, my_omexpovx_func )
# Use the first formula from Najfeld and Havel
return np.dot(IkeM, funadM)
def expm_grad_from_frechet(M):
"""
Compute the derivative of the matrix exponential by looking
at the directional derivative (Frechet derivative) across
the canonical basis
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
DexpM = np.zeros((k2,k2))
for i in range(0,k):
for j in range(0,k):
E = np.zeros((k,k))
E[i,j] = 1
eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False)
DexpM[:,j*k+i] = vec(eFME)
return DexpM
def transposition_matrix(M,N):
MN = M*N
vec_indices = np.arange(MN).reshape((M, N))
new_indices = np.transpose(vec_indices)
mat = np.zeros((MN,MN))
mat[vec_indices.ravel(),new_indices.ravel()] = 1
return mat
def col_to_row_major_derivation_matrix(m):
N = int(np.sqrt(m.shape[0]))
P = transposition_matrix(N,N)
return np.matmul(np.transpose(P),np.matmul(m,(P)))
def vec(mat):
"""
vectorise a matrix into a vector
"""
return mat.ravel('F')
def unvec(v):
"""
Unvectorise a vector representing a square matrix
"""
k = int(np.sqrt(len(v)))
assert(k * k == len(v))
return v.reshape((k, k), order='F')
def cvec(mat):
"""
vectorise a matrix into a vector (column major mode)
"""
return mat.ravel('C')
def uncvec(v):
"""
Unvectorise a vector (column major mode) representing a square matrix
"""
k = int(np.sqrt(len(v)))
assert(k * k == len(v))
return v.reshape((k, k), order='C')
@ops.RegisterGradient("MatrixExponential")
def _expm_grad(op, grad):
# We want the backward-mode gradient. Let X be the NxN input matrix.
# Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X.
# Let Y be the NxN previous gradient in the backward AD. We want
# unvec( ( vec(Y)^T . J(X) )^T )
# = unvec( J(X)^T . vec(Y) )
# = unvec( J(X^T) . vec(Y) )
# which is the forward-mode derivative applied to the transpose
grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False)
return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64) # List of one Tensor, since we have one input
tf.reset_default_graph()
N = 2
x = tf.placeholder(tf.float64, shape=[N, N])
y = tf.linalg.expm(x)
z = tf.matmul(x,x)
#z = tf.stack([tf.matmul(x,x),x])
y_list = tf.unstack(tf.reshape(y, [-1]))
jacobian_list = [tf.reshape(tf.gradients(y_, x)[0],[-1]) for y_ in y_list]
jacobian_cmaj = tf.stack(jacobian_list)
z_list = tf.unstack(tf.reshape(z, [-1]))
jacobian2_list = [tf.reshape(tf.gradients(z_, x)[0],[-1]) for z_ in z_list]
jacobian2_cmaj = tf.stack(jacobian2_list)
sess = tf.Session()
#x_val = np.array([[1.,0.2],[0.3,-4.]])
x_val = np.eye(N)+np.random.randn(N,N)
jacobian_cmaj_val, jacobian2_cmaj_val, jacobian_list_val, jacobian2_list_val = sess.run([jacobian_cmaj,jacobian2_cmaj,jacobian_list,jacobian2_list], feed_dict={x:x_val})
#manual_jac_rmaj_bis = expm_grad_from_najfeld(x_val,scipy.linalg.expm(x_val))
manual_jac_rmaj = expm_grad_from_frechet(x_val)
print("x=\n {}".format(x_val))
print("\nexpm\n=====\n")
print("\ntf cmaj jac=\n")
print(jacobian_cmaj_val)
print("\ntf jac list=\n")
print(jacobian_list_val)
print("\nmanual rmaj jac=\n")
print(manual_jac_rmaj)
#print(manual_jac_rmaj_bis)
#manual_cmaj_jac_bis = expm_grad_from_najfeld(np.transpose(x_val), scipy.linalg.expm(np.transpose(x_val)) )
manual_cmaj_jac = expm_grad_from_frechet(np.transpose(x_val))
print("\nmanual cmaj attempt jac=\n")
print(manual_cmaj_jac)
jc2r = col_to_row_major_derivation_matrix(jacobian_cmaj_val)
print("\ncol to row tf jac=\n")
print(jc2r)
print("\ndiff manual_jac_rmaj-jc2r:\n")
print(manual_jac_rmaj-jc2r)
print("\ndiff jacobian_cmaj_val-manual_cmaj_jac:\n")
print(jacobian_cmaj_val-manual_cmaj_jac)
print("\nx^2\n=====\n")
manual_jac2_rmaj = np.kron(np.transpose(x_val),np.eye(N))+np.kron(np.eye(N),x_val)
print("\ntf cmaj jac2=\n")
print(jacobian2_cmaj_val)
print("\ntf jac2 list=\n")
print(jacobian2_list_val)
print("\nmanual rmaj jac2=\n")
print(manual_jac2_rmaj)
manual_cmaj_jac2 = np.kron(x_val,np.eye(N))+np.kron(np.eye(N),np.transpose(x_val))
print("\nmanual cmaj attempt jac2=\n")
print(manual_cmaj_jac2)
j2c2r = col_to_row_major_derivation_matrix(jacobian2_cmaj_val)
print("\ncol to row tf jac2=\n")
print(j2c2r)
print("\ndiff manual_jac2_rmaj-j2c2r:\n")
print(manual_jac2_rmaj-j2c2r)
print("\ndiff jacobian2_cmaj_val-manual_cmaj_jac2:\n")
print(jacobian2_cmaj_val-manual_cmaj_jac2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment