Created
April 2, 2018 11:53
-
-
Save ASvyatkovskiy/8eee0a3ba9f0d958149269789f80b1ef to your computer and use it in GitHub Desktop.
Test mixed precision training on Volta / CuDNN autotune
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
import math | |
import os | |
def float32_variable_storage_getter(getter, name, shape=None, dtype=None, | |
initializer=None, regularizer=None, | |
trainable=True, | |
*args, **kwargs): | |
"""Custom variable getter that forces trainable variables to be stored in | |
float32 precision and then casts them to the training precision. | |
""" | |
storage_dtype = tf.float32 if trainable else dtype | |
variable = getter(name, shape, dtype=storage_dtype, | |
initializer=initializer, regularizer=regularizer, | |
trainable=trainable, | |
*args, **kwargs) | |
if trainable and dtype != tf.float32: | |
variable = tf.cast(variable, dtype) | |
return variable | |
def gradients_with_loss_scaling(loss, variables, loss_scale): | |
"""Gradient calculation with loss scaling to improve numerical stability | |
when training with float16. | |
""" | |
return [grad / loss_scale | |
for grad in tf.gradients(loss * loss_scale, variables)] | |
def create_simple_model(nbatch, dtype): | |
image_size = 224 | |
images = tf.Variable(tf.random_normal([nbatch, | |
image_size, | |
image_size, 3], | |
dtype=dtype, | |
stddev=1e-1)) | |
# conv1 | |
with tf.name_scope('conv1') as scope: | |
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=dtype, | |
stddev=1e-1), name='weights') | |
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME') | |
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=dtype), | |
trainable=True, name='biases') | |
bias = tf.nn.bias_add(conv, biases) | |
conv1 = tf.nn.relu(bias, name=scope) | |
# pool1 | |
pool1 = tf.nn.max_pool(conv1, | |
ksize=[1, 3, 3, 1], | |
strides=[1, 2, 2, 1], | |
padding='VALID', | |
name='pool1') | |
# conv2 | |
with tf.name_scope('conv2') as scope: | |
kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype=dtype, | |
stddev=1e-1), name='weights') | |
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME') | |
biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=dtype), | |
trainable=True, name='biases') | |
bias = tf.nn.bias_add(conv, biases) | |
conv2 = tf.nn.relu(bias, name=scope) | |
# pool2 | |
pool2 = tf.nn.max_pool(conv2, | |
ksize=[1, 3, 3, 1], | |
strides=[1, 2, 2, 1], | |
padding='VALID', | |
name='pool2') | |
# conv3 | |
with tf.name_scope('conv3') as scope: | |
kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], | |
dtype=dtype, | |
stddev=1e-1), name='weights') | |
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME') | |
biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=dtype), | |
trainable=True, name='biases') | |
bias = tf.nn.bias_add(conv, biases) | |
conv3 = tf.nn.relu(bias, name=scope) | |
# conv4 | |
with tf.name_scope('conv4') as scope: | |
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], | |
dtype=dtype, | |
stddev=1e-1), name='weights') | |
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME') | |
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=dtype), | |
trainable=True, name='biases') | |
bias = tf.nn.bias_add(conv, biases) | |
conv4 = tf.nn.relu(bias, name=scope) | |
# conv5 | |
with tf.name_scope('conv5') as scope: | |
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], | |
dtype=dtype, | |
stddev=1e-1), name='weights') | |
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME') | |
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=dtype), | |
trainable=True, name='biases') | |
bias = tf.nn.bias_add(conv, biases) | |
conv5 = tf.nn.relu(bias, name=scope) | |
# pool5 | |
pool5 = tf.nn.max_pool(conv5, | |
ksize=[1, 3, 3, 1], | |
strides=[1, 2, 2, 1], | |
padding='VALID', | |
name='pool5') | |
loss = tf.nn.l2_loss(pool5) | |
return loss | |
if __name__ == '__main__': | |
os.environ['TF_CUDNN_USE_AUTOTUNE'] = "0" | |
nbatch = 32 | |
nin = 100 | |
nout = 10 | |
learning_rate = 0.1 | |
momentum = 0.9 | |
loss_scale = 128 | |
dtype = tf.float16 #tf.float16 | |
tf.set_random_seed(1234) | |
np.random.seed(4321) | |
# Create training graph | |
with tf.device('/gpu:0'), tf.variable_scope('fp32_storage', custom_getter=float32_variable_storage_getter): | |
loss = create_simple_model(nbatch,dtype) | |
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) | |
grad = gradients_with_loss_scaling(loss, variables, loss_scale) | |
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) | |
training_step_op = optimizer.apply_gradients(zip(grad, variables)) | |
# Run training | |
import time | |
# Build an initialization operation. | |
init = tf.global_variables_initializer() | |
# Start running operations on the Graph. | |
config = tf.ConfigProto() | |
config.gpu_options.allocator_type = 'BFC' | |
sess = tf.Session(config=config) | |
sess.run(init) | |
total_duration = 0.0 | |
total_duration_squared = 0.0 | |
nminibatches = 5000 | |
for step in range(nminibatches): | |
start_time = time.time() | |
_ = sess.run(training_step_op) | |
duration = time.time() - start_time | |
total_duration += duration | |
total_duration_squared += duration * duration | |
mn = total_duration / float(nminibatches) | |
vr = total_duration_squared / float(nminibatches) - mn * mn | |
sd = math.sqrt(vr) | |
print ('Mean batch processing time {} +- {}'.format(mn, sd)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I always get the error below when running with training_step_op = optimizer.apply_gradients(zip(grad, variables))
Tensor objects are only iterable when eager execution is enabled. To iterate over this tensor use tf.map_fn. Does anybody have a clue? Thanks!