Last active
March 21, 2016 19:53
-
-
Save lukemetz/2072e9f7b3f2b9325e25 to your computer and use it in GitHub Desktop.
tensorflow speed benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# modified from slim | |
@scopes.add_arg_scope | |
def batch_norm(inputs, | |
decay=0.999, | |
scale=False, | |
epsilon=0.001, | |
moving_vars='moving_vars', | |
activation=None, | |
is_training=True, | |
trainable=True, | |
restore=True, | |
scope=None, | |
data_format='NHWC', | |
use_transpose=True): | |
with tf.variable_op_scope([inputs], scope, 'BatchNorm'): | |
transpose = False | |
if use_transpose and data_format == 'NCHW': | |
data_format = 'NHWC' | |
transpose = True | |
inputs = tf.transpose(inputs, [0, 2, 3, 1]) | |
inputs_shape = inputs.get_shape() | |
if data_format == 'NHWC': | |
axis = range(len(inputs_shape) - 1) | |
params_shape = inputs_shape[-1:] | |
elif data_format == 'NCHW': | |
assert len(inputs_shape) == 4 | |
axis = [0, 2, 3] | |
params_shape = (1, inputs_shape[1], 1, 1) | |
with scopes.arg_scope([variables.variable], restore=restore): | |
# Allocate parameters for the beta and gamma of the normalization. | |
beta = variables.variable('beta', | |
params_shape, | |
initializer=tf.zeros_initializer, | |
trainable=trainable) | |
if scale: | |
gamma = variables.variable('gamma', | |
params_shape, | |
initializer=tf.ones, | |
trainable=trainable) | |
else: | |
gamma = None | |
# Create moving_mean and moving_variance add them to moving_vars and | |
# GraphKeys.MOVING_AVERAGE_VARIABLES collections. | |
with scopes.arg_scope([variables.variable], trainable=False, | |
collections=[ | |
moving_vars, | |
tf.GraphKeys.MOVING_AVERAGE_VARIABLES]): | |
moving_mean = variables.variable('moving_mean', | |
params_shape, | |
initializer=tf.zeros_initializer) | |
moving_variance = variables.variable('moving_variance', | |
params_shape, | |
initializer=tf.ones) | |
if is_training: | |
# Calculate the moments based on the individual batch. | |
if data_format == 'NCHW': | |
mean, variance = tf.nn.moments(inputs, axis, keep_dims=True) | |
elif data_format == 'NHWC': | |
mean, variance = tf.nn.moments(inputs, axis) | |
update_moving_mean = moving_averages.assign_moving_average( | |
moving_mean, mean, decay) | |
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) | |
update_moving_variance = moving_averages.assign_moving_average( | |
moving_variance, variance, decay) | |
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) | |
else: | |
# Just use the moving_mean and moving_variance. | |
mean = moving_mean | |
variance = moving_variance | |
outputs = tf.nn.batch_normalization( | |
inputs, mean, variance, beta, gamma, epsilon) | |
outputs.set_shape(inputs.get_shape()) | |
if activation: | |
outputs = activation(outputs) | |
if transpose: | |
outputs = tf.transpose(outputs, [0, 3, 1, 2]) | |
return outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
import slim | |
from slim import scopes | |
import slim.ops | |
import copy | |
import time | |
FLAGS = tf.app.flags.FLAGS | |
batch_norm_params = { | |
'decay': 0.9, | |
'scale': True, | |
'epsilon': 0.001, | |
} | |
@scopes.add_arg_scope | |
def residual(inp, num_filters_out, last_act=tf.nn.relu, is_training=True): | |
with tf.variable_op_scope([inp], None, 'residual'): | |
o = slim.ops.conv2d(inp, num_filters_out=num_filters_out, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training) | |
o = slim.ops.conv2d(o, num_filters_out=num_filters_out, activation=None, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training) | |
return last_act(inp + o) | |
@scopes.add_arg_scope | |
def down_residual(inp, num_filters_out, last_act=tf.nn.relu, is_training=True): | |
with tf.variable_op_scope([inp], None, 'down_residual'): | |
o = slim.ops.conv2d(inp, num_filters_out=num_filters_out, stride=2, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training) | |
o = slim.ops.conv2d(o, num_filters_out=num_filters_out, activation=None, kernel_size=(3,3), batch_norm_params=batch_norm_params, is_training=is_training) | |
num_filters_in = inp.get_shape()[-1] | |
weights_shape = [3, 3, | |
num_filters_in, num_filters_out] | |
weights = slim.variables.variable("weights", shape=weights_shape) | |
proj = tf.nn.conv2d(inp, filter=weights, strides=(1, 2, 2, 1), padding="SAME") | |
return last_act(o + proj) | |
def model(inp, num_labels=500, is_training=True): | |
o = slim.ops.conv2d(inp, num_filters_out=16, kernel_size=(3, 3), is_training=is_training, batch_norm_params=batch_norm_params) | |
o = residual(o, 16, is_training=is_training) | |
o = down_residual(o, 32, is_training=is_training) | |
o = residual(o, 32, is_training=is_training) | |
o = down_residual(o, 64, is_training=is_training) | |
o = residual(o, 64, is_training=is_training) | |
o = down_residual(o, 128, is_training=is_training) | |
o = residual(o, 128, is_training=is_training) | |
avg = slim.ops.avg_pool(o, kernel_size=(4, 4), stride=1) | |
flatten = slim.ops.flatten(avg) | |
logits = slim.ops.fc(flatten, num_units_out=num_labels, activation=None, is_training=is_training, batch_norm_params=batch_norm_params) | |
return flatten, logits | |
def loss_func(logit, label): | |
return tf.nn.sparse_softmax_cross_entropy_with_logits(logit, label) | |
def tower_loss(inp, labels, num_classes, scope, is_training=True): | |
flatten, logits = model(inp, num_labels=num_classes, is_training=is_training) | |
l = loss_func(logits, labels) | |
return l, logits | |
def _average_gradients(tower_grads): | |
# compied from tensorflow examples | |
"""Calculate the average gradient for each shared variable across all towers. | |
Note that this function provides a synchronization point across all towers. | |
Args: | |
tower_grads: List of lists of (gradient, variable) tuples. The outer list | |
is over individual gradients. The inner list is over the gradient | |
calculation for each tower. | |
Returns: | |
List of pairs of (gradient, variable) where the gradient has been averaged | |
across all towers. | |
""" | |
average_grads = [] | |
for grad_and_vars in zip(*tower_grads): | |
# Note that each grad_and_vars looks like the following: | |
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) | |
grads = [] | |
for g, _ in grad_and_vars: | |
# Add 0 dimension to the gradients to represent the tower. | |
expanded_g = tf.expand_dims(g, 0) | |
# Append on a 'tower' dimension which we will average over below. | |
grads.append(expanded_g) | |
# Average over the 'tower' dimension. | |
grad = tf.concat(0, grads) | |
grad = tf.reduce_mean(grad, 0) | |
# Keep in mind that the Variables are redundant because they are shared | |
# across towers. So .. we will just return the first tower's pointer to | |
# the Variable. | |
v = grad_and_vars[0][1] | |
grad_and_var = (grad, v) | |
average_grads.append(grad_and_var) | |
return average_grads | |
def train(): | |
with tf.device("/gpu:0"): | |
batch_size = 128 | |
num_gpu = 4 | |
num_classes = 500 | |
summaries = [] | |
with tf.Graph().as_default(), tf.device('/cpu:0'): | |
means = tf.constant(np.array([123.68, 116.779, 103.939], dtype="float32").reshape((1, 1, 1, 3))) | |
global_step = tf.get_variable('global_step', [], tf.int64, | |
tf.constant_initializer(0), trainable=False) | |
lr = tf.train.exponential_decay(0.002, | |
global_step, | |
int(1.2e6 / (batch_size*4)), | |
0.955, | |
staircase=True) | |
opt = tf.train.AdamOptimizer(lr) | |
summaries.append(tf.scalar_summary("lr", lr)) | |
with tf.name_scope('model_towers') as scope, tf.device("/cpu:0"): | |
#images, labels = data.get_inputs(batch_size*num_gpu) | |
#images = tf.cast(images, tf.float32) | |
# Fake data to just tests speeds. | |
with tf.device("/cpu:0"): | |
images = tf.get_variable("images", [batch_size*num_gpu, 32, 32, 3], tf.float32, trainable=False) | |
labels = tf.get_variable("labels", [batch_size*num_gpu, ], tf.int64, tf.constant_initializer(0), trainable=False) | |
tower_grads = [] | |
infos = [] | |
for i in range(num_gpu): | |
b_imgs = images[i*batch_size:(i+1)*batch_size, :, :, :] | |
b_labels = labels[i*batch_size:(i+1)*batch_size] | |
b_imgs -= means | |
with tf.device("/gpu:%i"%i): | |
with tf.name_scope("Tower_%d"%i): | |
loss, logit = tower_loss(b_imgs, b_labels, num_classes, scope) | |
top5 = tf.nn.in_top_k(predictions=logit, targets=b_labels, k=5) | |
top5 = tf.cast(top5, tf.float32) | |
infos.append((tf.reduce_mean(loss), tf.reduce_mean(top5))) | |
grads = opt.compute_gradients(loss) | |
tower_grads.append(grads) | |
tf.get_variable_scope().reuse_variables() | |
grads = _average_gradients(tower_grads) | |
for grad, var in grads: | |
if grad: | |
summaries.append( | |
tf.histogram_summary(var.op.name + '/gradients', grad)) | |
with tf.device("/gpu:0"): | |
train_op = opt.apply_gradients(grads, global_step=global_step) | |
update_ops = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) | |
input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) | |
summaries.extend(input_summaries) | |
summaries = list(set(summaries)) | |
init = tf.initialize_all_variables() | |
sess = tf.Session(config=tf.ConfigProto( | |
allow_soft_placement=True, | |
)) | |
sess.run(init) | |
tf.train.start_queue_runners(sess=sess) | |
avgs = [] | |
rate = 100 | |
while True: | |
tstart = time.time() | |
i = sess.run([train_op] + update_ops)[1:3] | |
avgs.append(time.time() - tstart) | |
avgs = avgs[-rate:] | |
print "Examples per second", float(1.0 / np.mean(avgs) * batch_size * num_gpu) | |
def main(argv=None): | |
train() | |
if __name__ == "__main__": | |
tf.app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment