Created
November 3, 2017 01:38
-
-
Save jiqiujia/4774c99acb0f62f4def43e2423fcd0a3 to your computer and use it in GitHub Desktop.
tensorflow utils
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Adapted from TF repo | |
import tensorflow as tf | |
from tensorflow import gradients | |
from tensorflow.python.framework import ops | |
from tensorflow.python.ops import array_ops | |
from tensorflow.python.ops import math_ops | |
def hessian_vector_product(ys, xs, v): | |
"""Multiply the Hessian of `ys` wrt `xs` by `v`. | |
This is an efficient construction that uses a backprop-like approach | |
to compute the product between the Hessian and another vector. The | |
Hessian is usually too large to be explicitly computed or even | |
represented, but this method allows us to at least multiply by it | |
for the same big-O cost as backprop. | |
Implicit Hessian-vector products are the main practical, scalable way | |
of using second derivatives with neural networks. They allow us to | |
do things like construct Krylov subspaces and approximate conjugate | |
gradient descent. | |
Example: if `y` = 1/2 `x`^T A `x`, then `hessian_vector_product(y, | |
x, v)` will return an expression that evaluates to the same values | |
as (A + A.T) `v`. | |
Args: | |
ys: A scalar value, or a tensor or list of tensors to be summed to | |
yield a scalar. | |
xs: A list of tensors that we should construct the Hessian over. | |
v: A list of tensors, with the same shapes as xs, that we want to | |
multiply by the Hessian. | |
Returns: | |
A list of tensors (or if the list would be length 1, a single tensor) | |
containing the product between the Hessian and `v`. | |
Raises: | |
ValueError: `xs` and `v` have different length. | |
""" | |
# Validate the input | |
length = len(xs) | |
if len(v) != length: | |
raise ValueError("xs and v must have the same length.") | |
# First backprop | |
grads = gradients(ys, xs) | |
# grads = xs | |
assert len(grads) == length | |
elemwise_products = [ | |
math_ops.multiply(grad_elem, array_ops.stop_gradient(v_elem)) | |
for grad_elem, v_elem in zip(grads, v) if grad_elem is not None | |
] | |
# Second backprop | |
grads_with_none = gradients(elemwise_products, xs) | |
return_grads = [ | |
grad_elem if grad_elem is not None \ | |
else tf.zeros_like(x) \ | |
for x, grad_elem in zip(xs, grads_with_none)] | |
return return_grads | |
def _AsList(x): | |
return x if isinstance(x, (list, tuple)) else [x] | |
def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False, | |
gate_gradients=False, aggregation_method=None): | |
"""Constructs the Hessian of sum of `ys` with respect to `x` in `xs`. | |
`hessians()` adds ops to the graph to output the Hessian matrix of `ys` | |
with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` | |
where each tensor is the Hessian of `sum(ys)`. This function currently | |
only supports evaluating the Hessian with respect to (a list of) one- | |
dimensional tensors. | |
The Hessian is a matrix of second-order partial derivatives of a scalar | |
tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details). | |
Args: | |
ys: A `Tensor` or list of tensors to be differentiated. | |
xs: A `Tensor` or list of tensors to be used for differentiation. | |
name: Optional name to use for grouping all the gradient ops together. | |
defaults to 'hessians'. | |
colocate_gradients_with_ops: See `gradients()` documentation for details. | |
gate_gradients: See `gradients()` documentation for details. | |
aggregation_method: See `gradients()` documentation for details. | |
Returns: | |
A list of Hessian matrices of `sum(y)` for each `x` in `xs`. | |
Raises: | |
LookupError: if one of the operations between `xs` and `ys` does not | |
have a registered gradient function. | |
ValueError: if the arguments are invalid or not supported. Currently, | |
this function only supports one-dimensional `x` in `xs`. | |
""" | |
xs = _AsList(xs) | |
kwargs = { | |
'colocate_gradients_with_ops': colocate_gradients_with_ops, | |
'gate_gradients': gate_gradients, | |
'aggregation_method': aggregation_method | |
} | |
# Compute a hessian matrix for each x in xs | |
hessians = [] | |
for i, x in enumerate(xs): | |
# Check dimensions | |
ndims = x.get_shape().ndims | |
if ndims is None: | |
raise ValueError('Cannot compute Hessian because the dimensionality of ' | |
'element number %d of `xs` cannot be determined' % i) | |
elif ndims != 1: | |
raise ValueError('Computing hessians is currently only supported for ' | |
'one-dimensional tensors. Element number %d of `xs` has ' | |
'%d dimensions.' % (i, ndims)) | |
with ops.name_scope(name + '_first_derivative'): | |
# Compute the partial derivatives of the input with respect to all | |
# elements of `x` | |
_gradients = tf.gradients(ys, x, **kwargs)[0] | |
# Unpack the gradients into a list so we can take derivatives with | |
# respect to each element | |
_gradients = array_ops.unpack(_gradients) | |
with ops.name_scope(name + '_second_derivative'): | |
# Compute the partial derivatives with respect to each element of the list | |
_hess = [tf.gradients(_gradient, x, **kwargs)[0] for _gradient in _gradients] | |
# Pack the list into a matrix and add to the list of hessians | |
hessians.append(array_ops.pack(_hess, name=name)) | |
return hessians |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment