Last active
May 3, 2018 15:19
-
-
Save kristijanbartol/1b7b7c5d431415284217bbf63ca25c66 to your computer and use it in GitHub Desktop.
Optimizing f(x)=3*x using a single parameter (the model could be therefore written as m(a)=a*x). Testing custom gradients functionality in TensorFlow.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow.python.framework import ops | |
import numpy as np | |
import time | |
ZERO_TOL = 1e-8 | |
LOSS_TOL = 1e-3 | |
SAMPLES = 100 | |
EPOCHS = 100000 | |
train_input = np.random.rand(SAMPLES) | |
train_label = 3 * train_input | |
class MyException(Exception): | |
pass | |
def _my_linear_grad(op, grad): | |
# second value is not used - it can be multiplied by zero with no side effects | |
return grad * op.inputs[1], grad * 0. | |
def my_linear(a, x): | |
return (a * x).astype(np.float32) | |
learning_rate = 1e-3 | |
beta1 = 0.9999 | |
x = tf.placeholder(dtype=tf.float32, shape=(), name='x') | |
y = tf.placeholder(dtype=tf.float32, shape=(), name='y') | |
a = tf.get_variable('a', dtype=tf.float32, initializer=1.) | |
tf_a = tf.get_variable('tf_a', dtype=tf.float32, initializer=1.) | |
with ops.op_scope([a, x], name="MyLinear") as name: | |
# custom gradient op name shouldn't conflict with any other TF op name | |
unique_name = 'PyFuncGrad@Unique' | |
# using tf.RegisterGradient to set _my_linear_grad function in backward pass for gradient op named rnd_name | |
tf.RegisterGradient(unique_name)(_my_linear_grad) | |
g = tf.get_default_graph() | |
# context manager used to override gradients for nodes created in its block | |
with g.gradient_override_map({"PyFunc": unique_name}): | |
# my_linear is used for forward pass - my_linear and my_linear_grad are wrapped inside a single TF node | |
p = tf.py_func(my_linear, [a, x], [tf.float32], stateful=True, name=name) | |
tf_p = tf_a * x | |
loss = tf.reduce_mean(tf.square(p - y)) | |
tf_loss = tf.reduce_mean(tf.square(tf_p - y)) | |
train_vars = [var for var in tf.trainable_variables()] | |
optim = tf.train.AdamOptimizer(learning_rate, beta1) | |
# compute_gradients returns a list so I can just concatenate them to calculate tf_loss, too | |
grads_and_vars = optim.compute_gradients(loss, var_list=train_vars) | |
grads_and_vars += optim.compute_gradients(tf_loss, var_list=train_vars) | |
train_op = optim.apply_gradients(grads_and_vars) | |
tf.summary.scalar('loss', loss) | |
with tf.Session() as sess: | |
train_writer = tf.summary.FileWriter('board', sess.graph) | |
merge = tf.summary.merge_all() | |
sess.run(tf.global_variables_initializer()) | |
try: | |
for epoch in range(EPOCHS): | |
overall_loss = 0. | |
# update using each sample separately | |
for i in range(SAMPLES): | |
result = sess.run([loss, tf_loss, a, tf_a, merge, train_op], feed_dict={ | |
x: train_input[i], | |
y: train_label[i] | |
}) | |
if np.abs(result[0] - result[1]) > ZERO_TOL: | |
print('Invalid update!\nExpected: {}, Actual: {}'.format(result[1], result[0])) | |
raise MyException | |
print('epoch: {}, iter: {}, loss: {}\na: {}\n'.format(epoch, i, result[0], result[2])) | |
overall_loss += result[0] | |
overall_loss /= float(SAMPLES) | |
print('overall_loss: {}'.format(overall_loss)) | |
#time.sleep(2.0) | |
# [NOTE] this moment will be delayed a bit as it has to "wait" for the epoch to finish | |
if overall_loss < LOSS_TOL: | |
print('Found parameter!\n---------------\n') | |
break | |
except MyException: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment