Created
March 11, 2017 04:55
-
-
Save siemanko/551b502e1cf36a09c9c05385ccea5eb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.python.framework.errors import FailedPreconditionError | |
"""Code for data dependent initialization in Weight Normalization paper: | |
https://arxiv.org/abs/1602.07868 | |
""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument('mode', choices=['standard', 'onebyone'], type=str) | |
args = parser.parse_args() | |
def initialize_interdependent_variables(session, vars_list, feed_dict): | |
"""Initialize a list of variables one at a time, which is useful if | |
initialization of some variables depends on initialization of the others. | |
""" | |
vars_left = vars_list | |
while len(vars_left) > 0: | |
new_vars_left = [] | |
for v in vars_left: | |
try: | |
session.run(tf.variables_initializer([v]), feed_dict) | |
except FailedPreconditionError: | |
new_vars_left.append(v) | |
if len(new_vars_left) >= len(vars_left): | |
# This can happend if the variables all depend on each other, or more likely if there's | |
# another variable outside of the list, that still needs to be initialized. | |
raise Exception("Cycle in variable dependencies, or external precondition unsatisfied.") | |
else: | |
vars_left = new_vars_left | |
def fully_connected(x, | |
num_outputs, | |
activation_fn=None, | |
init_scale=1., | |
scope=None, | |
reuse=None): | |
with tf.variable_scope(scope, default_name="fully_connected", reuse=reuse): | |
# data based initialization of parameters | |
V = tf.get_variable( | |
'V', | |
[int(x.get_shape()[1]), num_outputs], | |
tf.float32, | |
tf.random_normal_initializer(0, 0.05), trainable=True | |
) | |
V_norm = tf.nn.l2_normalize(V, [0]) | |
x_init = tf.matmul(x, V_norm) | |
m_init, v_init = tf.nn.moments(x_init, [0]) | |
scale_init = init_scale / tf.sqrt(v_init + 1e-10) | |
g = tf.get_variable('g', dtype=tf.float32, initializer=scale_init, trainable=True) | |
b = tf.get_variable('b', dtype=tf.float32, initializer=-m_init * scale_init, trainable=True) | |
# use weight normalization (Salimans & Kingma, 2016) | |
x = tf.matmul(x, V) | |
scaler = g / tf.sqrt(tf.reduce_sum(tf.square(V), [0])) | |
x = tf.reshape(scaler, [1, num_outputs]) * x + tf.reshape(b, [1, num_outputs]) | |
# apply activation_fn | |
if activation_fn is not None: | |
x = activation_fn(x) | |
return x | |
x = tf.placeholder(tf.float32, [None, 28 * 28]) | |
h1 = fully_connected(x, num_outputs=200) | |
y = fully_connected(x, num_outputs=10) | |
session = tf.Session() | |
if args.mode == 'standard': | |
# OPTION 1: Use tensorflow init (throws FailedPreconditionError) | |
session.run(tf.global_variables_initializer(), { | |
x: np.ones((128, 28 * 28)) | |
}) | |
elif args.mode == 'onebyone': | |
# OPTION 2: Initialize variables one by one | |
initialize_interdependent_variables(session, tf.global_variables(), { | |
x: np.ones((128, 28 * 28)) | |
}) | |
# Ideal solution: tf.variables_initializer topologically | |
# sorts variables w.r.t dependency graph, | |
# before initializing. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment