tvercaut · July 13, 2018 15:27
diff --git a/exp_derivative_tensorflow_test.py b/exp_derivative_tensorflow_test.py
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 import tensorflow as tf
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops

 import numpy as np
 import scipy.linalg

 def my_omexpovx_func(x):
    """
    A utility function to get the scalar function (1-exp(-x))/x
    """
    # Start by the basic formula
    out = np.divide( (1-np.exp(-x)), x )

    # if the denominator is small, resort to a Taylor expansion
    # TODO find a properly motivated threshold for switching to the Taylor expansion
    idx = (abs(x) < 1e-6)
    x2idx = np.square(x[idx])
    out[idx] = 1 - x[idx]/2 + x2idx/6
    return out

 def expm_grad_from_najfeld(M, expM):
    """
    Compute the derivative of the matrix exponential
    For the general case in which the matrix may not be diagonalisable,
    Najfeld and Havel also provide two other formulas using the adjoint
    of the matrix and some matrix functionns
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)

    # some useful precomputations
    IkeM = np.kron(np.eye(k), expM)
    adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))

    # Apply my_func as a matrix function to the adjoint
    funadM = scipy.linalg.funm(adM, my_omexpovx_func )

    # Use the first formula from Najfeld and Havel
    return np.dot(IkeM, funadM)

 def expm_grad_from_frechet(M):
    """
    Compute the derivative of the matrix exponential by looking
    at the directional derivative (Frechet derivative) across
    the canonical basis
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)
    DexpM = np.zeros((k2,k2))
    
    for i in range(0,k):
        for j in range(0,k):
            E = np.zeros((k,k))
            E[i,j] = 1
            eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False)
            DexpM[:,j*k+i] = vec(eFME)
            
    return DexpM

 def vec(mat):
    """
    vectorise a matrix into a vector
    """
    return mat.ravel('F')

 def unvec(v):
    """
    Unvectorise a vector representing a square matrix
    """
    k = int(np.sqrt(len(v)))
    assert(k * k == len(v))
    return v.reshape((k, k), order='F')

 def cvec(mat):
    """
    vectorise a matrix into a vector (column major mode)
    """
    return mat.ravel('C')

 def uncvec(v):
    """
    Unvectorise a vector (column major mode) representing a square matrix
    """
    k = int(np.sqrt(len(v)))
    assert(k * k == len(v))
    return v.reshape((k, k), order='C')

 @ops.RegisterGradient("MatrixExponential")
 def _expm_grad(op, grad):
    # We want the backward-mode gradient. Let X be the NxN input matrix.
    # Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X.
    # Let Y be the NxN previous gradient in the backward AD. We want
    # unvec( ( vec(Y)^T . J(X) )^T )
    #   = unvec( J(X)^T . vec(Y) )
    #   = unvec( J(X^T) . vec(Y) )
    # which is the forward-mode derivative applied to the transpose
    grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False)
    return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64)

 tf.enable_eager_execution()
 tfe = tf.contrib.eager

 #x = tfe.Variable([[1.,3.],[-7.,9.]])
 x = tfe.Variable([[1.,2.],[3.,4.]], dtype=tf.float64)

 with tf.GradientTape(persistent=True) as tape:
    x2 = tf.matmul(x,x)
    expx = tf.linalg.expm(x)

 grad_x2 = tape.gradient(x2, x)
 grad_exp = tape.gradient(expx, x)

 print("\n{}^2=\n {}".format(x.numpy(),x2))
 print("\ngrad x2 =\n {}".format(grad_x2))

 # Get the full Jacobian of the matrix squaring
 # Note that the standard formula assumes column-major vectorisation
 # while numpy uses row-major ordering
 Dx2 = np.kron(np.transpose(x.numpy()),np.eye(2))+np.kron(np.eye(2),x.numpy())
 print("Jac x^2 =\n {}".format(Dx2))

 DxT2 = np.kron(x.numpy(),np.eye(2))+np.kron(np.eye(2),np.transpose(x.numpy()))
 print("Jac x^T^2 =\n {}".format(Dx2))

 old_grad = np.ones((2,2))

 grad_x2_np = unvec(np.matmul(vec(old_grad),Dx2))
 print("Manual grad x2 =\n {}".format(grad_x2_np))


 print("expm({})=\n {}".format(x.numpy(),expx))
 print("grad expm =\n {}".format(grad_exp))

 manual_jac = expm_grad_from_frechet(x.numpy())
 gradj_np = unvec(np.matmul(vec(old_grad),manual_jac))
 print("Manual grad expm =\n {}".format(gradj_np))

 gradf_np = scipy.linalg.expm_frechet(np.transpose(x.numpy()),old_grad,compute_expm=False)
 print("Manual grad expm v2 =\n {}".format(gradf_np))
diff --git a/exp_jacobian_tensorflow_test.py b/exp_jacobian_tensorflow_test.py
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 import tensorflow as tf
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops

 import numpy as np
 import scipy.linalg

 def my_omexpovx_func(x):
    """
    A utility function to get the scalar function (1-exp(-x))/x
    """
    # Start by the basic formula
    out = np.divide( (1-np.exp(-x)), x )

    # if the denominator is small, resort to a Taylor expansion
    # TODO find a properly motivated threshold for switching to the Taylor expansion
    idx = (abs(x) < 1e-6)
    x2idx = np.square(x[idx])
    out[idx] = 1 - x[idx]/2 + x2idx/6
    return out

 def expm_grad_from_najfeld(M, expM):
    """
    Compute the derivative of the matrix exponential
    For the general case in which the matrix may not be diagonalisable,
    Najfeld and Havel also provide two other formulas using the adjoint
    of the matrix and some matrix functionns
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)

    # some useful precomputations
    IkeM = np.kron(np.eye(k), expM)
    adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))

    # Apply my_func as a matrix function to the adjoint
    funadM = scipy.linalg.funm(adM, my_omexpovx_func )

    # Use the first formula from Najfeld and Havel
    return np.dot(IkeM, funadM)

 def expm_grad_from_frechet(M):
    """
    Compute the derivative of the matrix exponential by looking
    at the directional derivative (Frechet derivative) across
    the canonical basis
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)
    DexpM = np.zeros((k2,k2))
    
    for i in range(0,k):
        for j in range(0,k):
            E = np.zeros((k,k))
            E[i,j] = 1
            eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False)
            DexpM[:,j*k+i] = vec(eFME)
            
    return DexpM

 def transposition_matrix(M,N):
    MN = M*N
    vec_indices = np.arange(MN).reshape((M, N))
    new_indices = np.transpose(vec_indices)
    mat = np.zeros((MN,MN))
    mat[vec_indices.ravel(),new_indices.ravel()] = 1
    return mat

 def col_to_row_major_derivation_matrix(m):
    N = int(np.sqrt(m.shape[0]))
    P = transposition_matrix(N,N)
    return np.matmul(np.transpose(P),np.matmul(m,(P)))

 def vec(mat):
    """
    vectorise a matrix into a vector
    """
    return mat.ravel('F')

 def unvec(v):
    """
    Unvectorise a vector representing a square matrix
    """
    k = int(np.sqrt(len(v)))
    assert(k * k == len(v))
    return v.reshape((k, k), order='F')

 def cvec(mat):
    """
    vectorise a matrix into a vector (column major mode)
    """
    return mat.ravel('C')

 def uncvec(v):
    """
    Unvectorise a vector (column major mode) representing a square matrix
    """
    k = int(np.sqrt(len(v)))
    assert(k * k == len(v))
    return v.reshape((k, k), order='C')
    

 @ops.RegisterGradient("MatrixExponential")
 def _expm_grad(op, grad):
    # We want the backward-mode gradient. Let X be the NxN input matrix.
    # Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X.
    # Let Y be the NxN previous gradient in the backward AD. We want
    # unvec( ( vec(Y)^T . J(X) )^T )
    #   = unvec( J(X)^T . vec(Y) )
    #   = unvec( J(X^T) . vec(Y) )
    # which is the forward-mode derivative applied to the transpose
    grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False)
    return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64) # List of one Tensor, since we have one input





 tf.reset_default_graph()

 N = 2

 x = tf.placeholder(tf.float64, shape=[N, N])
 y = tf.linalg.expm(x)
 z = tf.matmul(x,x)
 #z = tf.stack([tf.matmul(x,x),x])

 y_list = tf.unstack(tf.reshape(y, [-1]))
 jacobian_list = [tf.reshape(tf.gradients(y_, x)[0],[-1]) for y_ in y_list]
 jacobian_cmaj = tf.stack(jacobian_list)

 z_list = tf.unstack(tf.reshape(z, [-1]))
 jacobian2_list = [tf.reshape(tf.gradients(z_, x)[0],[-1]) for z_ in z_list]
 jacobian2_cmaj = tf.stack(jacobian2_list)

 sess = tf.Session()

 #x_val = np.array([[1.,0.2],[0.3,-4.]])
 x_val = np.eye(N)+np.random.randn(N,N)
 jacobian_cmaj_val, jacobian2_cmaj_val, jacobian_list_val, jacobian2_list_val = sess.run([jacobian_cmaj,jacobian2_cmaj,jacobian_list,jacobian2_list], feed_dict={x:x_val})

 #manual_jac_rmaj_bis = expm_grad_from_najfeld(x_val,scipy.linalg.expm(x_val))
 manual_jac_rmaj = expm_grad_from_frechet(x_val)

 print("x=\n {}".format(x_val))






 print("\nexpm\n=====\n")

 print("\ntf cmaj jac=\n")
 print(jacobian_cmaj_val)

 print("\ntf jac list=\n")
 print(jacobian_list_val)

 print("\nmanual rmaj jac=\n")
 print(manual_jac_rmaj)
 #print(manual_jac_rmaj_bis)

 #manual_cmaj_jac_bis = expm_grad_from_najfeld(np.transpose(x_val), scipy.linalg.expm(np.transpose(x_val)) )
 manual_cmaj_jac = expm_grad_from_frechet(np.transpose(x_val))

 print("\nmanual cmaj attempt jac=\n")
 print(manual_cmaj_jac)

 jc2r = col_to_row_major_derivation_matrix(jacobian_cmaj_val)

 print("\ncol to row tf jac=\n")
 print(jc2r)

 print("\ndiff manual_jac_rmaj-jc2r:\n")
 print(manual_jac_rmaj-jc2r)

 print("\ndiff jacobian_cmaj_val-manual_cmaj_jac:\n")
 print(jacobian_cmaj_val-manual_cmaj_jac)




 print("\nx^2\n=====\n")

 manual_jac2_rmaj = np.kron(np.transpose(x_val),np.eye(N))+np.kron(np.eye(N),x_val)

 print("\ntf cmaj jac2=\n")
 print(jacobian2_cmaj_val)

 print("\ntf jac2 list=\n")
 print(jacobian2_list_val)

 print("\nmanual rmaj jac2=\n")
 print(manual_jac2_rmaj)

 manual_cmaj_jac2 = np.kron(x_val,np.eye(N))+np.kron(np.eye(N),np.transpose(x_val))

 print("\nmanual cmaj attempt jac2=\n")
 print(manual_cmaj_jac2)

 j2c2r = col_to_row_major_derivation_matrix(jacobian2_cmaj_val)

 print("\ncol to row tf jac2=\n")
 print(j2c2r)

 print("\ndiff manual_jac2_rmaj-j2c2r:\n")
 print(manual_jac2_rmaj-j2c2r)

 print("\ndiff jacobian2_cmaj_val-manual_cmaj_jac2:\n")
 print(jacobian2_cmaj_val-manual_cmaj_jac2)
	#!/usr/bin/env python
	from __future__ import absolute_import, division, print_function
	import tensorflow as tf
	from tensorflow.python.framework import ops
	from tensorflow.python.ops import array_ops
	from tensorflow.python.ops import sparse_ops

	import numpy as np
	import scipy.linalg

	def my_omexpovx_func(x):
	"""
	A utility function to get the scalar function (1-exp(-x))/x
	"""
	# Start by the basic formula
	out = np.divide( (1-np.exp(-x)), x )

	# if the denominator is small, resort to a Taylor expansion
	# TODO find a properly motivated threshold for switching to the Taylor expansion
	idx = (abs(x) < 1e-6)
	x2idx = np.square(x[idx])
	out[idx] = 1 - x[idx]/2 + x2idx/6
	return out

	def expm_grad_from_najfeld(M, expM):
	"""
	Compute the derivative of the matrix exponential
	For the general case in which the matrix may not be diagonalisable,
	Najfeld and Havel also provide two other formulas using the adjoint
	of the matrix and some matrix functionns
	"""
	k2 = M.size
	k = int(np.sqrt(k2))
	assert(k * k == k2)

	# some useful precomputations
	IkeM = np.kron(np.eye(k), expM)
	adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))

	# Apply my_func as a matrix function to the adjoint
	funadM = scipy.linalg.funm(adM, my_omexpovx_func )

	# Use the first formula from Najfeld and Havel
	return np.dot(IkeM, funadM)

	def expm_grad_from_frechet(M):
	"""
	Compute the derivative of the matrix exponential by looking
	at the directional derivative (Frechet derivative) across
	the canonical basis
	"""
	k2 = M.size
	k = int(np.sqrt(k2))
	assert(k * k == k2)
	DexpM = np.zeros((k2,k2))

	for i in range(0,k):
	for j in range(0,k):
	E = np.zeros((k,k))
	E[i,j] = 1
	eFME = scipy.linalg.expm_frechet(M,E,compute_expm=False)
	DexpM[:,j*k+i] = vec(eFME)

	return DexpM

	def vec(mat):
	"""
	vectorise a matrix into a vector
	"""
	return mat.ravel('F')

	def unvec(v):
	"""
	Unvectorise a vector representing a square matrix
	"""
	k = int(np.sqrt(len(v)))
	assert(k * k == len(v))
	return v.reshape((k, k), order='F')

	def cvec(mat):
	"""
	vectorise a matrix into a vector (column major mode)
	"""
	return mat.ravel('C')

	def uncvec(v):
	"""
	Unvectorise a vector (column major mode) representing a square matrix
	"""
	k = int(np.sqrt(len(v)))
	assert(k * k == len(v))
	return v.reshape((k, k), order='C')

	@ops.RegisterGradient("MatrixExponential")
	def _expm_grad(op, grad):
	# We want the backward-mode gradient. Let X be the NxN input matrix.
	# Let J(X) be the the N^2xN^2 complete Jacobian matrix of expm at X.
	# Let Y be the NxN previous gradient in the backward AD. We want
	# unvec( ( vec(Y)^T . J(X) )^T )
	# = unvec( J(X)^T . vec(Y) )
	# = unvec( J(X^T) . vec(Y) )
	# which is the forward-mode derivative applied to the transpose
	grad_func = lambda x, y: scipy.linalg.expm_frechet(x, y, compute_expm=False)
	return tf.py_func(grad_func, [tf.transpose(op.inputs[0]), grad], tf.float64)

	tf.enable_eager_execution()
	tfe = tf.contrib.eager

	#x = tfe.Variable([[1.,3.],[-7.,9.]])
	x = tfe.Variable([[1.,2.],[3.,4.]], dtype=tf.float64)

	with tf.GradientTape(persistent=True) as tape:
	x2 = tf.matmul(x,x)
	expx = tf.linalg.expm(x)

	grad_x2 = tape.gradient(x2, x)
	grad_exp = tape.gradient(expx, x)

	print("\n{}^2=\n {}".format(x.numpy(),x2))
	print("\ngrad x2 =\n {}".format(grad_x2))

	# Get the full Jacobian of the matrix squaring
	# Note that the standard formula assumes column-major vectorisation
	# while numpy uses row-major ordering
	Dx2 = np.kron(np.transpose(x.numpy()),np.eye(2))+np.kron(np.eye(2),x.numpy())
	print("Jac x^2 =\n {}".format(Dx2))

	DxT2 = np.kron(x.numpy(),np.eye(2))+np.kron(np.eye(2),np.transpose(x.numpy()))
	print("Jac x^T^2 =\n {}".format(Dx2))

	old_grad = np.ones((2,2))

	grad_x2_np = unvec(np.matmul(vec(old_grad),Dx2))
	print("Manual grad x2 =\n {}".format(grad_x2_np))


	print("expm({})=\n {}".format(x.numpy(),expx))
	print("grad expm =\n {}".format(grad_exp))

	manual_jac = expm_grad_from_frechet(x.numpy())
	gradj_np = unvec(np.matmul(vec(old_grad),manual_jac))
	print("Manual grad expm =\n {}".format(gradj_np))

	gradf_np = scipy.linalg.expm_frechet(np.transpose(x.numpy()),old_grad,compute_expm=False)
	print("Manual grad expm v2 =\n {}".format(gradf_np))