Skip to content

Instantly share code, notes, and snippets.

@tvercaut
Last active May 8, 2019 07:36
Show Gist options
  • Save tvercaut/61832fd5af87d0278475d01d40860d82 to your computer and use it in GitHub Desktop.
Save tvercaut/61832fd5af87d0278475d01d40860d82 to your computer and use it in GitHub Desktop.
Simple test to evaluate different means of computing the derivative of the matrix exponential
#!/usr/bin/env python
import numpy as np
import scipy, scipy.linalg
import warnings
# Define some useful function first
# see statsmodels/tsa/tsatools.py
def vec(mat):
"""
vectorise a matrix into a vector
"""
return mat.ravel('F')
def _triu_indices(n):
"""
Get the indices corresponding to the upper triangular part of a matrix
"""
rows, cols = np.triu_indices(n)
return rows * n + cols
def vech(mat):
"""
Half-vectorisation for symmetric matrcies
"""
# Gets Fortran-order
return mat.T.take(_triu_indices(len(mat)))
def unvec(v):
"""
Unvectorise a vector representing a square matrix
"""
k = int(np.sqrt(len(v)))
assert(k * k == len(v))
return v.reshape((k, k), order='F')
def unvech(v):
"""
Un-half-vectorise a vector representing a symmetric matrix matrix
"""
# quadratic formula, correct fp error
rows = .5 * (-1 + np.sqrt(1 + 8 * len(v)))
rows = int(np.round(rows))
result = np.zeros((rows, rows))
result[np.triu_indices(rows)] = v
result = result + result.T
# divide diagonal elements by 2
result[np.diag_indices(rows)] /= 2
return result
def duplication_matrix(n):
"""
Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for
symmetric matrix S
Returns
-------
D_n : ndarray
"""
tmp = np.eye(n * (n + 1) // 2)
return np.array([unvech(x).ravel() for x in tmp]).T
def elimination_matrix(n):
"""
Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for
any matrix M
Parameters
----------
Returns
-------
"""
vech_indices = vec(np.tril(np.ones((n, n))))
return np.eye(n * n)[vech_indices != 0]
def expm_grad_from_frechet(M):
"""
Compute the derivative of the matrix exponential by looking
at the directional derivative (Frechet derivative) across
the canonical basis
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
DexpM = np.zeros((k2,k2))
for i in range(0,k):
for j in range(0,k):
E = np.zeros((k,k))
E[i,j] = 1
[eM, eFME] = scipy.linalg.expm_frechet(M,E)
DexpM[:,i*k+j] = eFME.ravel()
return DexpM
def my_loewner(x,y):
"""
A utility function as part of the computation of the Loewner matrix
to evaluate (exp(x)-exp(y))/(x-y) even when x is close to y
"""
delta = x-y
# Check if the difference between x and y is small
# TODO make sure we use the right threshold for which
# the Taylor expansion becomes more accurate
if( abs(delta) < 1e-6 ):
return np.exp(y)*(1 + delta/2 + delta*delta/6)
return (np.exp(x)-np.exp(y))/(x-y)
def expm_grad_from_loewner(M):
"""
Compute the derivative of the matrix exponential
When the matrix is diagonalisable, we can rely on the eigendecomposition and the
Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
Implementation inspired from theano
https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
"""
# Start by the eigendecomposition
w, V = scipy.linalg.eig(M, right=True)
U = scipy.linalg.inv(V).T
# Compute the Loewner matrix from the eigenvalues
myufunc = np.frompyfunc(my_loewner, 2, 1)
ratio = myufunc.outer(w,w).astype(w.dtype)
# Get the derivative matrix as per Najfeld and Havel
return np.real( np.kron(V, U).dot( np.diag(ratio.ravel()) ).dot( np.kron(U.T,V.T) ) )
def expm_frechet_from_loewner(M,dX):
"""
Compute the derivative of the matrix exponential
When the matrix is diagonalisable, we can rely on the eigendecomposition and the
Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017)
Implementation inspired from theano
https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py
"""
# Start by the eigendecomposition
w, V = scipy.linalg.eig(M, right=True)
U = scipy.linalg.inv(V).T
# Compute the Loewner matrix from the eigenvalues
myufunc = np.frompyfunc(my_loewner, 2, 1)
ratio = myufunc.outer(w,w).astype(w.dtype)
# Get the derivative matrix as per Najfeld and Havel
return np.real( U.dot( np.multiply( V.T.dot(dX).dot(U), ratio ) ).dot(V.T) )
def my_omexpovx_func(x):
"""
A utility function to get the scalar function (1-exp(-x))/x
"""
# Start by the basic formula
out = np.divide( (1-np.exp(-x)), x )
# if the denominator is small, resort to a Taylor expansion
# TODO find a properly motivated threshold for switching to the Taylor expansion
idx = (abs(x) < 1e-6)
x2idx = np.square(x[idx])
out[idx] = 1 - x[idx]/2 + x2idx/6
return out
def expm_grad_from_najfeld(M, expM):
"""
Compute the derivative of the matrix exponential
For the general case in which the matrix may not be diagonalisable,
Najfeld and Havel also provide two other formulas using the adjoint
of the matrix and some matrix functionns
"""
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
# some useful precomputations
IkeM = np.kron(np.eye(k), expM)
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
# Apply my_func as a matrix function to the adjoint
funadM = scipy.linalg.funm(adM, my_omexpovx_func )
# Use the first formula from Najfeld and Havel
return np.dot(IkeM, funadM)
def my_sinch(x):
"""
Stably evaluate sinch for vector inputs.
Implementation adapted from scipy/sparse/linalg/matfuncs.py
Notes
-----
The strategy of falling back to a sixth order Taylor expansion
was suggested by the Spallation Neutron Source docs
which was found on the internet by google search.
http://www.ornl.gov/~t6p/resources/xal/javadoc/gov/sns/tools/math/ElementaryFunction.html
The details of the cutoff point and the Horner-like evaluation
was picked without reference to anything in particular.
Note that sinch is not currently implemented in scipy.special,
whereas the "engineer's" definition of sinc is implemented.
The implementation of sinc involves a scaling factor of pi
that distinguishes it from the "mathematician's" version of sinc.
"""
# apply the basic formula by default
x2 = np.square(x)
out = np.divide( np.sinh(x), x )
# If x is small then use sixth order Taylor expansion.
# How small is small? I am using the point where the relative error
# of the approximation is less than 1e-14.
# If x is large then directly evaluate sinh(x) / x.
idx = (abs(x) < 0.0135)
out[idx] = 1 + (x2[idx]/6.)*(1 + (x2[idx]/20.)*(1 + (x2[idx]/42.)))
return out
def expm_grad_from_najfeld2(M):
k2 = M.size
k = int(np.sqrt(k2))
assert(k * k == k2)
# some useful precomputations
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k))
eM2 = scipy.linalg.expm(M/2)
eM2k = np.kron(eM2.T, eM2)
funadM2 = scipy.linalg.funm(-adM/2, lambda x: my_sinch(x) )
return np.dot(eM2k, funadM2)
# Create a random symmetric matrix and get its exponential
M = np.random.randn(3,3)
M = M + M.T
expM = scipy.linalg.expm(M)
# Create another random matrix to check the directional
# derivative along this matrix
dX = 0.05 * np.random.randn(3,3)
dX = dX + dX.T
# Evaluate the method based on expm_frechet
DexpM = expm_grad_from_frechet(M)
print "M = \n", M
print "exp(M) = \n", expM
print "d(exp(Y))/dY|Y=M = \n", DexpM
print "dX = \n", dX
print "exp(M+dX) = \n", scipy.linalg.expm( M + dX )
print "exp(M) + DexpM.dX = \n", expM + DexpM.dot(dX.ravel()).reshape(3,3)
print "exp(M) + dX = \n", expM + dX
[eM, eFMX] = scipy.linalg.expm_frechet(M,dX)
print "eFMX = \n", eFMX
print "DexpM.dX = \n", DexpM.dot(dX.ravel()).reshape(3,3)
# Evaluate the Loewner matrix based method (as in theano)
DexpM_theano = expm_grad_from_loewner(M)
eFMX_theano = expm_frechet_from_loewner(M,dX)
print "eFMX_theano = \n", eFMX_theano
print "DexpM_theano = \n", DexpM_theano
print "DexpM_theano.dX = \n", DexpM_theano.dot(dX.ravel()).reshape(3,3)
# Evaluate the first formula from Najfeld and Havel
DexpM_najfeld = expm_grad_from_najfeld(M,expM)
print "DexpM_najfeld = \n", DexpM_najfeld
print "eFMX_najfeld = DexpM_najfeld.dX = \n", DexpM_najfeld.dot(dX.ravel()).reshape(3,3)
#print "exp(M) + DexpM_najfeld.dX = \n", expM + DexpM_najfeld.dot(dX.ravel()).reshape(3,3)
# Now let's try the second formula of Najfeld and Havel
DexpM_najfeld2 = expm_grad_from_najfeld2(M)
print "DexpM_najfeld2 = \n", DexpM_najfeld2
print "eFMX_najfeld2 = DexpM_najfeld2.dX = \n", DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)
#print "exp(M) + DexpM_najfeld2.dX = \n", expM + DexpM_najfeld2.dot(dX.ravel()).reshape(3,3)
# we can also check the consistency of the result when using half-vectorisation
# and the corresponding restricted matrix representation
rest_DexpM = elimination_matrix(3).dot(DexpM_najfeld).dot(duplication_matrix(3))
print "rest_DexpM = \n", rest_DexpM
print "rest_DexpM.vech(dX) = \n", unvech(rest_DexpM.dot(vech(dX)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment