Last active
November 21, 2018 04:25
-
-
Save jimmy15923/6718e464ffd1d782a71b9a48a9e4e8fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Tesing code for CUDA unified memory | |
Run this script with CUDA unified memory by | |
``` | |
python cuda_unified_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5 | |
``` | |
""" | |
import numpy as np | |
import time | |
import tensorflow as tf | |
from tensorflow.python.client import device_lib | |
import tensorflow.contrib.slim as slim | |
import tensorflow.contrib.slim.nets as slimNet | |
tf.logging.set_verbosity(tf.logging.INFO) | |
FLAGS = tf.app.flags.FLAGS | |
tf.app.flags.DEFINE_string('f', '', 'kernel') | |
tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using") | |
tf.app.flags.DEFINE_string("model", "resnet", "select from resnet50, googlenet") | |
tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size") | |
tf.app.flags.DEFINE_integer("image_size", 224, "Image size") | |
tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory") | |
import os | |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |
os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id | |
# synthetic data | |
x = np.random.randint(0, 1, size=(2048, FLAGS.image_size, FLAGS.image_size, 3)) | |
x = x.astype("float32") | |
y = np.random.randint(0, 1000, size=2048) | |
y = tf.keras.utils.to_categorical(y, 1000) | |
# def data generator | |
dataset = tf.data.Dataset.from_tensor_slices((x, y)) | |
dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size)) | |
dataset = dataset.repeat(50) | |
iterator = dataset.make_one_shot_iterator() | |
inputs, labels = iterator.get_next() | |
# build model | |
if FLAGS.model == "resnet50: | |
with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)): | |
_, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True) | |
logits = layers_dict['resnet_v1_50/logits'] | |
logits = tf.keras.layers.Flatten()(logits) | |
elif FLAGS.model == "googlenet": | |
with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()): | |
_, layers_dict = slimNet.inception.inception_v1(inputs, num_classes=1000, is_training=True) | |
logits = layers_dict['Logits'] | |
else: | |
print("No model support") | |
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, | |
logits=logits) | |
# Create training op. | |
with tf.name_scope('adam_optimizer'): | |
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) | |
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) | |
with tf.control_dependencies(update_ops): | |
train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step()) | |
# setup tf.ConfigProto() | |
config = tf.ConfigProto() | |
if FLAGS.cuda_memory > 1: | |
config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory | |
print("USING CUDA UNIFIED MEMORY") | |
else: | |
print("USING GPU MEMORY ONLY") | |
# Start session and training | |
res = [] | |
# Start session and training | |
with tf.train.MonitoredTrainingSession(config=config) as sess: | |
for b in range(20): | |
t = time.time() | |
sess.run(train_step) | |
t1 = time.time() | |
_loss = sess.run(loss) | |
print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t))) | |
res.append(FLAGS.batch_size / (t1-t)) | |
print(np.mean(res[1:]), " +- ", np.std(res[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment