Last active
May 8, 2019 07:36
-
-
Save tvercaut/61832fd5af87d0278475d01d40860d82 to your computer and use it in GitHub Desktop.
Simple test to evaluate different means of computing the derivative of the matrix exponential
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import numpy as np | |
import scipy, scipy.linalg | |
import warnings | |
# Define some useful function first | |
# see statsmodels/tsa/tsatools.py | |
def vec(mat): | |
""" | |
vectorise a matrix into a vector | |
""" | |
return mat.ravel('F') | |
def _triu_indices(n): | |
""" | |
Get the indices corresponding to the upper triangular part of a matrix | |
""" | |
rows, cols = np.triu_indices(n) | |
return rows * n + cols | |
def vech(mat): | |
""" | |
Half-vectorisation for symmetric matrcies | |
""" | |
# Gets Fortran-order | |
return mat.T.take(_triu_indices(len(mat))) | |
def unvec(v): | |
""" | |
Unvectorise a vector representing a square matrix | |
""" | |
k = int(np.sqrt(len(v))) | |
assert(k * k == len(v)) | |
return v.reshape((k, k), order='F') | |
def unvech(v): | |
""" | |
Un-half-vectorise a vector representing a symmetric matrix matrix | |
""" | |
# quadratic formula, correct fp error | |
rows = .5 * (-1 + np.sqrt(1 + 8 * len(v))) | |
rows = int(np.round(rows)) | |
result = np.zeros((rows, rows)) | |
result[np.triu_indices(rows)] = v | |
result = result + result.T | |
# divide diagonal elements by 2 | |
result[np.diag_indices(rows)] /= 2 | |
return result | |
def duplication_matrix(n): | |
""" | |
Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for | |
symmetric matrix S | |
Returns | |
------- | |
D_n : ndarray | |
""" | |
tmp = np.eye(n * (n + 1) // 2) | |
return np.array([unvech(x).ravel() for x in tmp]).T | |
def elimination_matrix(n): | |
""" | |
Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for | |
any matrix M | |
Parameters | |
---------- | |
Returns | |
------- | |
""" | |
vech_indices = vec(np.tril(np.ones((n, n)))) | |
return np.eye(n * n)[vech_indices != 0] | |
def expm_grad_from_frechet(M): | |
""" | |
Compute the derivative of the matrix exponential by looking | |
at the directional derivative (Frechet derivative) across | |
the canonical basis | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
DexpM = np.zeros((k2,k2)) | |
for i in range(0,k): | |
for j in range(0,k): | |
E = np.zeros((k,k)) | |
E[i,j] = 1 | |
[eM, eFME] = scipy.linalg.expm_frechet(M,E) | |
DexpM[:,i*k+j] = eFME.ravel() | |
return DexpM | |
def my_loewner(x,y): | |
""" | |
A utility function as part of the computation of the Loewner matrix | |
to evaluate (exp(x)-exp(y))/(x-y) even when x is close to y | |
""" | |
delta = x-y | |
# Check if the difference between x and y is small | |
# TODO make sure we use the right threshold for which | |
# the Taylor expansion becomes more accurate | |
if( abs(delta) < 1e-6 ): | |
return np.exp(y)*(1 + delta/2 + delta*delta/6) | |
return (np.exp(x)-np.exp(y))/(x-y) | |
def expm_grad_from_loewner(M): | |
""" | |
Compute the derivative of the matrix exponential | |
When the matrix is diagonalisable, we can rely on the eigendecomposition and the | |
Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017) | |
Implementation inspired from theano | |
https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py | |
""" | |
# Start by the eigendecomposition | |
w, V = scipy.linalg.eig(M, right=True) | |
U = scipy.linalg.inv(V).T | |
# Compute the Loewner matrix from the eigenvalues | |
myufunc = np.frompyfunc(my_loewner, 2, 1) | |
ratio = myufunc.outer(w,w).astype(w.dtype) | |
# Get the derivative matrix as per Najfeld and Havel | |
return np.real( np.kron(V, U).dot( np.diag(ratio.ravel()) ).dot( np.kron(U.T,V.T) ) ) | |
def expm_frechet_from_loewner(M,dX): | |
""" | |
Compute the derivative of the matrix exponential | |
When the matrix is diagonalisable, we can rely on the eigendecomposition and the | |
Loewner matrix as shown in Najfeld and Havel 1995 (http: //dx.doi.org/10.1006/aama.1995.1017) | |
Implementation inspired from theano | |
https://github.com/Theano/Theano/blob/master/theano/tensor/slinalg.py | |
""" | |
# Start by the eigendecomposition | |
w, V = scipy.linalg.eig(M, right=True) | |
U = scipy.linalg.inv(V).T | |
# Compute the Loewner matrix from the eigenvalues | |
myufunc = np.frompyfunc(my_loewner, 2, 1) | |
ratio = myufunc.outer(w,w).astype(w.dtype) | |
# Get the derivative matrix as per Najfeld and Havel | |
return np.real( U.dot( np.multiply( V.T.dot(dX).dot(U), ratio ) ).dot(V.T) ) | |
def my_omexpovx_func(x): | |
""" | |
A utility function to get the scalar function (1-exp(-x))/x | |
""" | |
# Start by the basic formula | |
out = np.divide( (1-np.exp(-x)), x ) | |
# if the denominator is small, resort to a Taylor expansion | |
# TODO find a properly motivated threshold for switching to the Taylor expansion | |
idx = (abs(x) < 1e-6) | |
x2idx = np.square(x[idx]) | |
out[idx] = 1 - x[idx]/2 + x2idx/6 | |
return out | |
def expm_grad_from_najfeld(M, expM): | |
""" | |
Compute the derivative of the matrix exponential | |
For the general case in which the matrix may not be diagonalisable, | |
Najfeld and Havel also provide two other formulas using the adjoint | |
of the matrix and some matrix functionns | |
""" | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
# some useful precomputations | |
IkeM = np.kron(np.eye(k), expM) | |
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k)) | |
# Apply my_func as a matrix function to the adjoint | |
funadM = scipy.linalg.funm(adM, my_omexpovx_func ) | |
# Use the first formula from Najfeld and Havel | |
return np.dot(IkeM, funadM) | |
def my_sinch(x): | |
""" | |
Stably evaluate sinch for vector inputs. | |
Implementation adapted from scipy/sparse/linalg/matfuncs.py | |
Notes | |
----- | |
The strategy of falling back to a sixth order Taylor expansion | |
was suggested by the Spallation Neutron Source docs | |
which was found on the internet by google search. | |
http://www.ornl.gov/~t6p/resources/xal/javadoc/gov/sns/tools/math/ElementaryFunction.html | |
The details of the cutoff point and the Horner-like evaluation | |
was picked without reference to anything in particular. | |
Note that sinch is not currently implemented in scipy.special, | |
whereas the "engineer's" definition of sinc is implemented. | |
The implementation of sinc involves a scaling factor of pi | |
that distinguishes it from the "mathematician's" version of sinc. | |
""" | |
# apply the basic formula by default | |
x2 = np.square(x) | |
out = np.divide( np.sinh(x), x ) | |
# If x is small then use sixth order Taylor expansion. | |
# How small is small? I am using the point where the relative error | |
# of the approximation is less than 1e-14. | |
# If x is large then directly evaluate sinh(x) / x. | |
idx = (abs(x) < 0.0135) | |
out[idx] = 1 + (x2[idx]/6.)*(1 + (x2[idx]/20.)*(1 + (x2[idx]/42.))) | |
return out | |
def expm_grad_from_najfeld2(M): | |
k2 = M.size | |
k = int(np.sqrt(k2)) | |
assert(k * k == k2) | |
# some useful precomputations | |
adM = np.kron(np.eye(k), M) - np.kron(M.T, np.eye(k)) | |
eM2 = scipy.linalg.expm(M/2) | |
eM2k = np.kron(eM2.T, eM2) | |
funadM2 = scipy.linalg.funm(-adM/2, lambda x: my_sinch(x) ) | |
return np.dot(eM2k, funadM2) | |
# Create a random symmetric matrix and get its exponential | |
M = np.random.randn(3,3) | |
M = M + M.T | |
expM = scipy.linalg.expm(M) | |
# Create another random matrix to check the directional | |
# derivative along this matrix | |
dX = 0.05 * np.random.randn(3,3) | |
dX = dX + dX.T | |
# Evaluate the method based on expm_frechet | |
DexpM = expm_grad_from_frechet(M) | |
print "M = \n", M | |
print "exp(M) = \n", expM | |
print "d(exp(Y))/dY|Y=M = \n", DexpM | |
print "dX = \n", dX | |
print "exp(M+dX) = \n", scipy.linalg.expm( M + dX ) | |
print "exp(M) + DexpM.dX = \n", expM + DexpM.dot(dX.ravel()).reshape(3,3) | |
print "exp(M) + dX = \n", expM + dX | |
[eM, eFMX] = scipy.linalg.expm_frechet(M,dX) | |
print "eFMX = \n", eFMX | |
print "DexpM.dX = \n", DexpM.dot(dX.ravel()).reshape(3,3) | |
# Evaluate the Loewner matrix based method (as in theano) | |
DexpM_theano = expm_grad_from_loewner(M) | |
eFMX_theano = expm_frechet_from_loewner(M,dX) | |
print "eFMX_theano = \n", eFMX_theano | |
print "DexpM_theano = \n", DexpM_theano | |
print "DexpM_theano.dX = \n", DexpM_theano.dot(dX.ravel()).reshape(3,3) | |
# Evaluate the first formula from Najfeld and Havel | |
DexpM_najfeld = expm_grad_from_najfeld(M,expM) | |
print "DexpM_najfeld = \n", DexpM_najfeld | |
print "eFMX_najfeld = DexpM_najfeld.dX = \n", DexpM_najfeld.dot(dX.ravel()).reshape(3,3) | |
#print "exp(M) + DexpM_najfeld.dX = \n", expM + DexpM_najfeld.dot(dX.ravel()).reshape(3,3) | |
# Now let's try the second formula of Najfeld and Havel | |
DexpM_najfeld2 = expm_grad_from_najfeld2(M) | |
print "DexpM_najfeld2 = \n", DexpM_najfeld2 | |
print "eFMX_najfeld2 = DexpM_najfeld2.dX = \n", DexpM_najfeld2.dot(dX.ravel()).reshape(3,3) | |
#print "exp(M) + DexpM_najfeld2.dX = \n", expM + DexpM_najfeld2.dot(dX.ravel()).reshape(3,3) | |
# we can also check the consistency of the result when using half-vectorisation | |
# and the corresponding restricted matrix representation | |
rest_DexpM = elimination_matrix(3).dot(DexpM_najfeld).dot(duplication_matrix(3)) | |
print "rest_DexpM = \n", rest_DexpM | |
print "rest_DexpM.vech(dX) = \n", unvech(rest_DexpM.dot(vech(dX))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment