Skip to content

Instantly share code, notes, and snippets.

@enijkamp
Created May 16, 2018 22:33
Show Gist options
  • Save enijkamp/cd5f874c878d26743048b3a84ea2cd4f to your computer and use it in GitHub Desktop.
Save enijkamp/cd5f874c878d26743048b3a84ea2cd4f to your computer and use it in GitHub Desktop.
import collections
import tensorflow as tf
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import gen_array_ops
from tensorflow.python.ops import nn_ops
def dot(x, y):
return math_ops.reduce_sum(math_ops.conj(x) * y)
def l2norm_squared(v):
return constant_op.constant(2, dtype=v.dtype.base_dtype) * nn_ops.l2_loss(v)
def l2norm(v):
return math_ops.sqrt(l2norm_squared(v))
def l2normalize(v):
norm = l2norm(v)
return v / norm, norm
def lanczos(operator, operator_adj, k, shape, orthogonalize=False, starting_vector=None, name='Lanczos'):
def tarray(size, dtype, name):
return tensor_array_ops.TensorArray(dtype=dtype, size=size, tensor_array_name=name, clear_after_read=False)
def read_colvec(tarray, i):
return array_ops.expand_dims(tarray.read(i), -1)
def write_colvec(tarray, colvec, i):
return tarray.write(i, array_ops.squeeze(colvec))
lanzcos_bidiag_state = collections.namedtuple('LanczosBidiagState', ['u', 'v', 'alpha', 'beta'])
def update_state(old, i, u, v, alpha, beta):
return lanzcos_bidiag_state(
write_colvec(old.u, u, i + 1),
write_colvec(old.v, v, i),
old.alpha.write(i, alpha), old.beta.write(i, beta))
def gram_schmidt_step(j, basis, v):
v_shape = v.get_shape()
basis_vec = read_colvec(basis, j)
v -= math_ops.matmul(basis_vec, v, adjoint_a=True) * basis_vec
v.set_shape(v_shape)
return j + 1, basis, v
def orthogonalize_once(i, basis, v):
j = constant_op.constant(0, dtype=dtypes.int32)
_, _, v = control_flow_ops.while_loop(lambda j, basis, v: j < i, gram_schmidt_step, [j, basis, v])
return l2normalize(v)
def orthogonalize_(i, basis, v):
v_norm = l2norm(v)
v_new, v_new_norm = orthogonalize_once(i, basis, v)
# If the norm decreases more than 1/sqrt(2), run a second round of MGS.
# See proof in: B. N. Parlett, ``The Symmetric Eigenvalue Problem'', Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109
return control_flow_ops.cond(v_new_norm < 0.7071 * v_norm, lambda: orthogonalize_once(i, basis, v), lambda: (v_new, v_new_norm))
def stopping_criterion(i, _):
return i < k
def lanczos_bidiag_step(i, ls):
u = read_colvec(ls.u, i)
r = operator_adj(u)
# The shape inference doesn't work across cond, save and reapply the shape.
r_shape = r.get_shape()
r = control_flow_ops.cond(
i > 0,
lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1),
lambda: r)
r.set_shape(r_shape)
if orthogonalize:
v, alpha = orthogonalize_(i - 1, ls.v, r)
else:
v, alpha = l2normalize(r)
p = operator(v) - alpha * u
if orthogonalize:
u, beta = orthogonalize_(i, ls.u, p)
else:
u, beta = l2normalize(p)
return i + 1, update_state(ls, i, u, v, alpha, beta)
with ops.name_scope(name):
dtype = tf.float32
if starting_vector is None:
starting_vector = random_ops.random_uniform(shape[:1], -1, 1, dtype=dtype)
u0, _ = l2normalize(starting_vector)
ls = lanzcos_bidiag_state(
u=write_colvec(tarray(k + 1, dtype, 'u'), u0, 0),
v=tarray(k, dtype, 'v'),
alpha=tarray(k, dtype, 'alpha'),
beta=tarray(k, dtype, 'beta'))
i = constant_op.constant(0, dtype=dtypes.int32)
_, ls = control_flow_ops.while_loop(stopping_criterion, lanczos_bidiag_step, [i, ls])
return lanzcos_bidiag_state(
array_ops.matrix_transpose(ls.u.stack()),
array_ops.matrix_transpose(ls.v.stack()),
ls.alpha.stack(), ls.beta.stack())
def hessian_vector_product(ys, xs, v):
length = len(xs)
if len(v) != length:
raise ValueError("xs and v must have the same length.")
grads = tf.gradients(ys, xs)
assert len(grads) == length
elemwise_products = [
math_ops.multiply(grad_elem, array_ops.stop_gradient(v_elem))
for grad_elem, v_elem in zip(grads, v)
if grad_elem is not None]
return tf.gradients(elemwise_products, xs)
def c(x):
return tf.constant(x, dtype=tf.float32)
def f(x):
return tf.pow(x[0], c(2)) + c(2) * x[0] * x[1] + c(3) * tf.pow(x[1], c(2)) + c(4) * x[0] + c(5) * x[1] + c(6)
tf.reset_default_graph()
x = tf.placeholder(shape=[2], dtype=tf.float32)
Hv = lambda v: hessian_vector_product(f(x), [x], [v])[0]
Hv_adj = gen_array_ops.conjugate_transpose(Hv, tf.int32)
v1 = tf.random_uniform([2])
v1 = v1 / tf.norm(v1)
lbs = lanczos(operator=Hv, operator_adj=Hv_adj, shape=[2,2], k=2)
#T = tf.diag(alphas) + tf.pad(betas_, [[1, 0], [0, 1]]) + tf.pad(betas_, [[0, 1], [1, 0]])
#e, v = tf.self_adjoint_eig(T)
with tf.Session().as_default() as sess:
print(sess.run(lbs, feed_dict={x: [1.0, 1.0]}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment