tvercaut · May 8, 2019 07:36
diff --git a/exp_derivative_test.py b/exp_derivative_test.py
 #!/usr/bin/env python
 import numpy as np
 import scipy, scipy.linalg
 import warnings

 # Define some useful function first
 # see statsmodels/tsa/tsatools.py

 def vec(mat):
    """
    vectorise a matrix into a vector
    """
    return mat.ravel('F')

 def _triu_indices(n):
    """
    Get the indices corresponding to the upper triangular part of a matrix
    """
    rows, cols = np.triu_indices(n)
    return rows * n + cols

 def vech(mat):
    """
    Half-vectorisation for symmetric matrcies
    """
    # Gets Fortran-order
    return mat.T.take(_triu_indices(len(mat)))

 def unvec(v):
    """
    Unvectorise a vector representing a square matrix
    """
    k = int(np.sqrt(len(v)))
    assert(k * k == len(v))
    return v.reshape((k, k), order='F')

 def unvech(v):
    """
    Un-half-vectorise a vector representing a symmetric matrix matrix
    """
    # quadratic formula, correct fp error
    rows = .5 * (-1 + np.sqrt(1 + 8 * len(v)))
    rows = int(np.round(rows))

    result = np.zeros((rows, rows))
    result[np.triu_indices(rows)] = v
    result = result + result.T

    # divide diagonal elements by 2
    result[np.diag_indices(rows)] /= 2

    return result

 def duplication_matrix(n):
    """
    Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for
    symmetric matrix S
    Returns
    -------
    D_n : ndarray
    """
    tmp = np.eye(n * (n + 1) // 2)
    return np.array([unvech(x).ravel() for x in tmp]).T

 def elimination_matrix(n):
    """
    Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for
    any matrix M
    Parameters
    ----------
    Returns
    -------
    """
    vech_indices = vec(np.tril(np.ones((n, n))))
    return np.eye(n * n)[vech_indices != 0]

 def expm_grad_from_frechet(M):
    """
    Compute the derivative of the matrix exponential by looking
    at the directional derivative (Frechet derivative) across
    the canonical basis
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)
    DexpM = np.zeros((k2,k2))
    
    for i in range(0,k):
        for j in range(0,k):
            E = np.zeros((k,k))
            E[i,j] = 1
            [eM, eFME] = scipy.linalg.expm_frechet(M,E)
            DexpM[:,i*k+j] = eFME.ravel()
            
    return DexpM

 def my_loewner(x,y):
    """
    A utility function as part of the computation of the Loewner matrix
    to evaluate (exp(x)-exp(y))/(x-y) even when x is close to y
    """
    delta = x-y
    # Check if the difference between x and y is small
    # TODO make sure we use the right threshold for which
    # the Taylor expansion becomes more accurate
    if( abs(delta) < 1e-6 ):
        return np.exp(y)*(1 + delta/2 + delta*delta/6)
    return (np.exp(x)-np.exp(y))/(x-y)

 def expm_grad_from_loewner(M):
    """
    Compute the derivative of the matrix exponential
    When the matrix is diagonalisable, we can rely on the eigendecomposition and the
    Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
    Implementation inspired from theano
    https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
    """
    # Start by the eigendecomposition
    w, V = scipy.linalg.eig(M, right=True)
    U = scipy.linalg.inv(V).T
    
    # Compute the Loewner matrix from the eigenvalues
    myufunc = np.frompyfunc(my_loewner, 2, 1)
    ratio = myufunc.outer(w,w).astype(w.dtype)
    
    # Get the derivative matrix as per Najfeld and Havel
    return np.real( np.kron(V, U).dot( np.diag(ratio.ravel()) ).dot( np.kron(U.T,V.T) ) )

 def expm_frechet_from_loewner(M,dX):
    """
    Compute the derivative of the matrix exponential
    When the matrix is diagonalisable, we can rely on the eigendecomposition and the
    Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
    Implementation inspired from theano
    https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
    """
    # Start by the eigendecomposition
    w, V = scipy.linalg.eig(M, right=True)
    U = scipy.linalg.inv(V).T
    
    # Compute the Loewner matrix from the eigenvalues
    myufunc = np.frompyfunc(my_loewner, 2, 1)
    ratio = myufunc.outer(w,w).astype(w.dtype)
    
    # Get the derivative matrix as per Najfeld and Havel
    return np.real( U.dot( np.multiply( V.T.dot(dX).dot(U), ratio ) ).dot(V.T) )

 def my_omexpovx_func(x):
    """
    A utility function to get the scalar function (1-exp(-x))/x
    """
    # Start by the basic formula
    out = np.divide( (1-np.exp(-x)), x )

    # if the denominator is small, resort to a Taylor expansion
    # TODO find a properly motivated threshold for switching to the Taylor expansion
    idx = (abs(x) < 1e-6)
    x2idx = np.square(x[idx])
    out[idx] = 1 - x[idx]/2 + x2idx/6
    return out

 def expm_grad_from_najfeld(M, expM):
    """
    Compute the derivative of the matrix exponential
    For the general case in which the matrix may not be diagonalisable,
    Najfeld and Havel also provide two other formulas using the adjoint
    of the matrix and some matrix functionns
    """
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)

    # some useful precomputations
    IkeM = np.kron(np.eye(k), expM)
    adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))

    # Apply my_func as a matrix function to the adjoint
    funadM = scipy.linalg.funm(adM, my_omexpovx_func )

    # Use the first formula from Najfeld and Havel
    return np.dot(IkeM, funadM)

 def my_sinch(x):
    """
    Stably evaluate sinch for vector inputs.

    Implementation adapted from scipy/sparse/linalg/matfuncs.py

    Notes
    -----
    The strategy of falling back to a sixth order Taylor expansion
    was suggested by the Spallation Neutron Source docs
    which was found on the internet by google search.
    http://www.ornl.gov/~t6p/resources/xal/javadoc/gov/sns/tools/math/ElementaryFunction.html
    The details of the cutoff point and the Horner-like evaluation
    was picked without reference to anything in particular.
    Note that sinch is not currently implemented in scipy.special,
    whereas the "engineer's" definition of sinc is implemented.
    The implementation of sinc involves a scaling factor of pi
    that distinguishes it from the "mathematician's" version of sinc.
    """

    # apply the basic formula by default
    x2 = np.square(x)
    out = np.divide( np.sinh(x), x )

    # If x is small then use sixth order Taylor expansion.
    # How small is small? I am using the point where the relative error
    # of the approximation is less than 1e-14.
    # If x is large then directly evaluate sinh(x) / x.
    idx = (abs(x) < 0.0135)
    out[idx] = 1 + (x2[idx]/6.)*(1 + (x2[idx]/20.)*(1 + (x2[idx]/42.)))
    return out

 def expm_grad_from_najfeld2(M):
    k2 = M.size
    k = int(np.sqrt(k2))
    assert(k * k == k2)
    
    # some useful precomputations
    adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
    eM2 =  scipy.linalg.expm(M/2)
    eM2k =  np.kron(eM2.T, eM2)
    funadM2 = scipy.linalg.funm(-adM/2, lambda x: my_sinch(x) )
    return np.dot(eM2k, funadM2)


 # Create a random symmetric matrix and get its exponential
 M = np.random.randn(3,3)
 M = M + M.T

 expM = scipy.linalg.expm(M)

 # Create another random matrix to check the directional
 # derivative along this matrix
 dX = 0.05 * np.random.randn(3,3)
 dX = dX + dX.T

 # Evaluate the method based on expm_frechet
 DexpM = expm_grad_from_frechet(M)

 print "M = \n", M
 print "exp(M) = \n", expM
 print "d(exp(Y))/dY|Y=M = \n", DexpM

 print "dX = \n", dX

 print "exp(M+dX) = \n", scipy.linalg.expm( M + dX )
 print "exp(M) + DexpM.dX = \n", expM + DexpM.dot(dX.ravel()).reshape(3,3)
 print "exp(M) + dX = \n", expM + dX

 [eM, eFMX] = scipy.linalg.expm_frechet(M,dX)
 print "eFMX = \n", eFMX
 print "DexpM.dX = \n", DexpM.dot(dX.ravel()).reshape(3,3)

 # Evaluate the Loewner matrix based method (as in theano)
 DexpM_theano = expm_grad_from_loewner(M)
 eFMX_theano = expm_frechet_from_loewner(M,dX)

 print "eFMX_theano = \n", eFMX_theano
 print "DexpM_theano = \n", DexpM_theano
 print "DexpM_theano.dX = \n", DexpM_theano.dot(dX.ravel()).reshape(3,3)


 # Evaluate the first formula from Najfeld and Havel
 DexpM_najfeld = expm_grad_from_najfeld(M,expM)

 print "DexpM_najfeld = \n", DexpM_najfeld
 print "eFMX_najfeld = DexpM_najfeld.dX = \n", DexpM_najfeld.dot(dX.ravel()).reshape(3,3)

 #print "exp(M) + DexpM_najfeld.dX = \n", expM + DexpM_najfeld.dot(dX.ravel()).reshape(3,3)


 # Now let's try the second formula of Najfeld and Havel
 DexpM_najfeld2 = expm_grad_from_najfeld2(M)


 print "DexpM_najfeld2 = \n", DexpM_najfeld2
 print "eFMX_najfeld2 = DexpM_najfeld2.dX = \n", DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)

 #print "exp(M) + DexpM_najfeld2.dX = \n", expM + DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)

 # we can also check the consistency of the result when using half-vectorisation
 # and the corresponding restricted matrix representation
 rest_DexpM = elimination_matrix(3).dot(DexpM_najfeld).dot(duplication_matrix(3))
 print "rest_DexpM = \n", rest_DexpM
 print "rest_DexpM.vech(dX) = \n", unvech(rest_DexpM.dot(vech(dX)))
	#!/usr/bin/env python
	import numpy as np
	import scipy, scipy.linalg
	import warnings

	# Define some useful function first
	# see statsmodels/tsa/tsatools.py

	def vec(mat):
	"""
	vectorise a matrix into a vector
	"""
	return mat.ravel('F')

	def _triu_indices(n):
	"""
	Get the indices corresponding to the upper triangular part of a matrix
	"""
	rows, cols = np.triu_indices(n)
	return rows * n + cols

	def vech(mat):
	"""
	Half-vectorisation for symmetric matrcies
	"""
	# Gets Fortran-order
	return mat.T.take(_triu_indices(len(mat)))

	def unvec(v):
	"""
	Unvectorise a vector representing a square matrix
	"""
	k = int(np.sqrt(len(v)))
	assert(k * k == len(v))
	return v.reshape((k, k), order='F')

	def unvech(v):
	"""
	Un-half-vectorise a vector representing a symmetric matrix matrix
	"""
	# quadratic formula, correct fp error
	rows = .5 * (-1 + np.sqrt(1 + 8 * len(v)))
	rows = int(np.round(rows))

	result = np.zeros((rows, rows))
	result[np.triu_indices(rows)] = v
	result = result + result.T

	# divide diagonal elements by 2
	result[np.diag_indices(rows)] /= 2

	return result

	def duplication_matrix(n):
	"""
	Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for
	symmetric matrix S
	Returns
	-------
	D_n : ndarray
	"""
	tmp = np.eye(n * (n + 1) // 2)
	return np.array([unvech(x).ravel() for x in tmp]).T

	def elimination_matrix(n):
	"""
	Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for
	any matrix M
	Parameters
	----------
	Returns
	-------
	"""
	vech_indices = vec(np.tril(np.ones((n, n))))
	return np.eye(n * n)[vech_indices != 0]

	def expm_grad_from_frechet(M):
	"""
	Compute the derivative of the matrix exponential by looking
	at the directional derivative (Frechet derivative) across
	the canonical basis
	"""
	k2 = M.size
	k = int(np.sqrt(k2))
	assert(k * k == k2)
	DexpM = np.zeros((k2,k2))

	for i in range(0,k):
	for j in range(0,k):
	E = np.zeros((k,k))
	E[i,j] = 1
	[eM, eFME] = scipy.linalg.expm_frechet(M,E)
	DexpM[:,i*k+j] = eFME.ravel()

	return DexpM

	def my_loewner(x,y):
	"""
	A utility function as part of the computation of the Loewner matrix
	to evaluate (exp(x)-exp(y))/(x-y) even when x is close to y
	"""
	delta = x-y
	# Check if the difference between x and y is small
	# TODO make sure we use the right threshold for which
	# the Taylor expansion becomes more accurate
	if( abs(delta) < 1e-6 ):
	return np.exp(y)(1 + delta/2 + deltadelta/6)
	return (np.exp(x)-np.exp(y))/(x-y)

	def expm_grad_from_loewner(M):
	"""
	Compute the derivative of the matrix exponential
	When the matrix is diagonalisable, we can rely on the eigendecomposition and the
	Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
	Implementation inspired from theano
	https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
	"""
	# Start by the eigendecomposition
	w, V = scipy.linalg.eig(M, right=True)
	U = scipy.linalg.inv(V).T

	# Compute the Loewner matrix from the eigenvalues
	myufunc = np.frompyfunc(my_loewner, 2, 1)
	ratio = myufunc.outer(w,w).astype(w.dtype)

	# Get the derivative matrix as per Najfeld and Havel
	return np.real( np.kron(V, U).dot( np.diag(ratio.ravel()) ).dot( np.kron(U.T,V.T) ) )

	def expm_frechet_from_loewner(M,dX):
	"""
	Compute the derivative of the matrix exponential
	When the matrix is diagonalisable, we can rely on the eigendecomposition and the
	Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
	Implementation inspired from theano
	https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
	"""
	# Start by the eigendecomposition
	w, V = scipy.linalg.eig(M, right=True)
	U = scipy.linalg.inv(V).T

	# Compute the Loewner matrix from the eigenvalues
	myufunc = np.frompyfunc(my_loewner, 2, 1)
	ratio = myufunc.outer(w,w).astype(w.dtype)

	# Get the derivative matrix as per Najfeld and Havel
	return np.real( U.dot( np.multiply( V.T.dot(dX).dot(U), ratio ) ).dot(V.T) )

	def my_omexpovx_func(x):
	"""
	A utility function to get the scalar function (1-exp(-x))/x
	"""
	# Start by the basic formula
	out = np.divide( (1-np.exp(-x)), x )

	# if the denominator is small, resort to a Taylor expansion
	# TODO find a properly motivated threshold for switching to the Taylor expansion
	idx = (abs(x) < 1e-6)
	x2idx = np.square(x[idx])
	out[idx] = 1 - x[idx]/2 + x2idx/6
	return out

	def expm_grad_from_najfeld(M, expM):
	"""
	Compute the derivative of the matrix exponential
	For the general case in which the matrix may not be diagonalisable,
	Najfeld and Havel also provide two other formulas using the adjoint
	of the matrix and some matrix functionns
	"""
	k2 = M.size
	k = int(np.sqrt(k2))
	assert(k * k == k2)

	# some useful precomputations
	IkeM = np.kron(np.eye(k), expM)
	adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))

	# Apply my_func as a matrix function to the adjoint
	funadM = scipy.linalg.funm(adM, my_omexpovx_func )

	# Use the first formula from Najfeld and Havel
	return np.dot(IkeM, funadM)

	def my_sinch(x):
	"""
	Stably evaluate sinch for vector inputs.

	Implementation adapted from scipy/sparse/linalg/matfuncs.py

	Notes
	-----
	The strategy of falling back to a sixth order Taylor expansion
	was suggested by the Spallation Neutron Source docs
	which was found on the internet by google search.
	http://www.ornl.gov/~t6p/resources/xal/javadoc/gov/sns/tools/math/ElementaryFunction.html
	The details of the cutoff point and the Horner-like evaluation
	was picked without reference to anything in particular.
	Note that sinch is not currently implemented in scipy.special,
	whereas the "engineer's" definition of sinc is implemented.
	The implementation of sinc involves a scaling factor of pi
	that distinguishes it from the "mathematician's" version of sinc.
	"""

	# apply the basic formula by default
	x2 = np.square(x)
	out = np.divide( np.sinh(x), x )

	# If x is small then use sixth order Taylor expansion.
	# How small is small? I am using the point where the relative error
	# of the approximation is less than 1e-14.
	# If x is large then directly evaluate sinh(x) / x.
	idx = (abs(x) < 0.0135)
	out[idx] = 1 + (x2[idx]/6.)(1 + (x2[idx]/20.)(1 + (x2[idx]/42.)))
	return out

	def expm_grad_from_najfeld2(M):
	k2 = M.size
	k = int(np.sqrt(k2))
	assert(k * k == k2)

	# some useful precomputations
	adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
	eM2 = scipy.linalg.expm(M/2)
	eM2k = np.kron(eM2.T, eM2)
	funadM2 = scipy.linalg.funm(-adM/2, lambda x: my_sinch(x) )
	return np.dot(eM2k, funadM2)


	# Create a random symmetric matrix and get its exponential
	M = np.random.randn(3,3)
	M = M + M.T

	expM = scipy.linalg.expm(M)

	# Create another random matrix to check the directional
	# derivative along this matrix
	dX = 0.05 * np.random.randn(3,3)
	dX = dX + dX.T

	# Evaluate the method based on expm_frechet
	DexpM = expm_grad_from_frechet(M)

	print "M = \n", M
	print "exp(M) = \n", expM
	print "d(exp(Y))/dY\|Y=M = \n", DexpM

	print "dX = \n", dX

	print "exp(M+dX) = \n", scipy.linalg.expm( M + dX )
	print "exp(M) + DexpM.dX = \n", expM + DexpM.dot(dX.ravel()).reshape(3,3)
	print "exp(M) + dX = \n", expM + dX

	[eM, eFMX] = scipy.linalg.expm_frechet(M,dX)
	print "eFMX = \n", eFMX
	print "DexpM.dX = \n", DexpM.dot(dX.ravel()).reshape(3,3)

	# Evaluate the Loewner matrix based method (as in theano)
	DexpM_theano = expm_grad_from_loewner(M)
	eFMX_theano = expm_frechet_from_loewner(M,dX)

	print "eFMX_theano = \n", eFMX_theano
	print "DexpM_theano = \n", DexpM_theano
	print "DexpM_theano.dX = \n", DexpM_theano.dot(dX.ravel()).reshape(3,3)


	# Evaluate the first formula from Najfeld and Havel
	DexpM_najfeld = expm_grad_from_najfeld(M,expM)

	print "DexpM_najfeld = \n", DexpM_najfeld
	print "eFMX_najfeld = DexpM_najfeld.dX = \n", DexpM_najfeld.dot(dX.ravel()).reshape(3,3)

	#print "exp(M) + DexpM_najfeld.dX = \n", expM + DexpM_najfeld.dot(dX.ravel()).reshape(3,3)


	# Now let's try the second formula of Najfeld and Havel
	DexpM_najfeld2 = expm_grad_from_najfeld2(M)


	print "DexpM_najfeld2 = \n", DexpM_najfeld2
	print "eFMX_najfeld2 = DexpM_najfeld2.dX = \n", DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)

	#print "exp(M) + DexpM_najfeld2.dX = \n", expM + DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)

	# we can also check the consistency of the result when using half-vectorisation
	# and the corresponding restricted matrix representation
	rest_DexpM = elimination_matrix(3).dot(DexpM_najfeld).dot(duplication_matrix(3))
	print "rest_DexpM = \n", rest_DexpM
	print "rest_DexpM.vech(dX) = \n", unvech(rest_DexpM.dot(vech(dX)))