Created
May 16, 2018 22:33
-
-
Save enijkamp/cd5f874c878d26743048b3a84ea2cd4f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import tensorflow as tf | |
from tensorflow.python.framework import constant_op | |
from tensorflow.python.framework import dtypes | |
from tensorflow.python.framework import ops | |
from tensorflow.python.ops import array_ops | |
from tensorflow.python.ops import control_flow_ops | |
from tensorflow.python.ops import math_ops | |
from tensorflow.python.ops import random_ops | |
from tensorflow.python.ops import tensor_array_ops | |
from tensorflow.python.ops import gen_array_ops | |
from tensorflow.python.ops import nn_ops | |
def dot(x, y): | |
return math_ops.reduce_sum(math_ops.conj(x) * y) | |
def l2norm_squared(v): | |
return constant_op.constant(2, dtype=v.dtype.base_dtype) * nn_ops.l2_loss(v) | |
def l2norm(v): | |
return math_ops.sqrt(l2norm_squared(v)) | |
def l2normalize(v): | |
norm = l2norm(v) | |
return v / norm, norm | |
def lanczos(operator, operator_adj, k, shape, orthogonalize=False, starting_vector=None, name='Lanczos'): | |
def tarray(size, dtype, name): | |
return tensor_array_ops.TensorArray(dtype=dtype, size=size, tensor_array_name=name, clear_after_read=False) | |
def read_colvec(tarray, i): | |
return array_ops.expand_dims(tarray.read(i), -1) | |
def write_colvec(tarray, colvec, i): | |
return tarray.write(i, array_ops.squeeze(colvec)) | |
lanzcos_bidiag_state = collections.namedtuple('LanczosBidiagState', ['u', 'v', 'alpha', 'beta']) | |
def update_state(old, i, u, v, alpha, beta): | |
return lanzcos_bidiag_state( | |
write_colvec(old.u, u, i + 1), | |
write_colvec(old.v, v, i), | |
old.alpha.write(i, alpha), old.beta.write(i, beta)) | |
def gram_schmidt_step(j, basis, v): | |
v_shape = v.get_shape() | |
basis_vec = read_colvec(basis, j) | |
v -= math_ops.matmul(basis_vec, v, adjoint_a=True) * basis_vec | |
v.set_shape(v_shape) | |
return j + 1, basis, v | |
def orthogonalize_once(i, basis, v): | |
j = constant_op.constant(0, dtype=dtypes.int32) | |
_, _, v = control_flow_ops.while_loop(lambda j, basis, v: j < i, gram_schmidt_step, [j, basis, v]) | |
return l2normalize(v) | |
def orthogonalize_(i, basis, v): | |
v_norm = l2norm(v) | |
v_new, v_new_norm = orthogonalize_once(i, basis, v) | |
# If the norm decreases more than 1/sqrt(2), run a second round of MGS. | |
# See proof in: B. N. Parlett, ``The Symmetric Eigenvalue Problem'', Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109 | |
return control_flow_ops.cond(v_new_norm < 0.7071 * v_norm, lambda: orthogonalize_once(i, basis, v), lambda: (v_new, v_new_norm)) | |
def stopping_criterion(i, _): | |
return i < k | |
def lanczos_bidiag_step(i, ls): | |
u = read_colvec(ls.u, i) | |
r = operator_adj(u) | |
# The shape inference doesn't work across cond, save and reapply the shape. | |
r_shape = r.get_shape() | |
r = control_flow_ops.cond( | |
i > 0, | |
lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1), | |
lambda: r) | |
r.set_shape(r_shape) | |
if orthogonalize: | |
v, alpha = orthogonalize_(i - 1, ls.v, r) | |
else: | |
v, alpha = l2normalize(r) | |
p = operator(v) - alpha * u | |
if orthogonalize: | |
u, beta = orthogonalize_(i, ls.u, p) | |
else: | |
u, beta = l2normalize(p) | |
return i + 1, update_state(ls, i, u, v, alpha, beta) | |
with ops.name_scope(name): | |
dtype = tf.float32 | |
if starting_vector is None: | |
starting_vector = random_ops.random_uniform(shape[:1], -1, 1, dtype=dtype) | |
u0, _ = l2normalize(starting_vector) | |
ls = lanzcos_bidiag_state( | |
u=write_colvec(tarray(k + 1, dtype, 'u'), u0, 0), | |
v=tarray(k, dtype, 'v'), | |
alpha=tarray(k, dtype, 'alpha'), | |
beta=tarray(k, dtype, 'beta')) | |
i = constant_op.constant(0, dtype=dtypes.int32) | |
_, ls = control_flow_ops.while_loop(stopping_criterion, lanczos_bidiag_step, [i, ls]) | |
return lanzcos_bidiag_state( | |
array_ops.matrix_transpose(ls.u.stack()), | |
array_ops.matrix_transpose(ls.v.stack()), | |
ls.alpha.stack(), ls.beta.stack()) | |
def hessian_vector_product(ys, xs, v): | |
length = len(xs) | |
if len(v) != length: | |
raise ValueError("xs and v must have the same length.") | |
grads = tf.gradients(ys, xs) | |
assert len(grads) == length | |
elemwise_products = [ | |
math_ops.multiply(grad_elem, array_ops.stop_gradient(v_elem)) | |
for grad_elem, v_elem in zip(grads, v) | |
if grad_elem is not None] | |
return tf.gradients(elemwise_products, xs) | |
def c(x): | |
return tf.constant(x, dtype=tf.float32) | |
def f(x): | |
return tf.pow(x[0], c(2)) + c(2) * x[0] * x[1] + c(3) * tf.pow(x[1], c(2)) + c(4) * x[0] + c(5) * x[1] + c(6) | |
tf.reset_default_graph() | |
x = tf.placeholder(shape=[2], dtype=tf.float32) | |
Hv = lambda v: hessian_vector_product(f(x), [x], [v])[0] | |
Hv_adj = gen_array_ops.conjugate_transpose(Hv, tf.int32) | |
v1 = tf.random_uniform([2]) | |
v1 = v1 / tf.norm(v1) | |
lbs = lanczos(operator=Hv, operator_adj=Hv_adj, shape=[2,2], k=2) | |
#T = tf.diag(alphas) + tf.pad(betas_, [[1, 0], [0, 1]]) + tf.pad(betas_, [[0, 1], [1, 0]]) | |
#e, v = tf.self_adjoint_eig(T) | |
with tf.Session().as_default() as sess: | |
print(sess.run(lbs, feed_dict={x: [1.0, 1.0]})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment