Last active
November 22, 2018 05:02
-
-
Save jimmy15923/78584d85ed5ef203bbc0b8237cb36289 to your computer and use it in GitHub Desktop.
test code for IBM LMS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Tesing code for IBM LMS / CUDA Unified Memory | |
Run this script with CUDA Unified Memory by | |
``` | |
python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5 | |
``` | |
Run this script with IBM Large Model Support | |
``` | |
python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --use_lms=True | |
``` | |
""" | |
import numpy as np | |
import time | |
import os | |
import tensorflow as tf | |
import tensorflow.contrib.slim as slim | |
import tensorflow.contrib.slim.nets as slimNet | |
tf.logging.set_verbosity(tf.logging.INFO) | |
FLAGS = tf.app.flags.FLAGS | |
tf.app.flags.DEFINE_string('f', '', 'kernel') | |
tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using") | |
tf.app.flags.DEFINE_string("model", "resnet50", "select from resnet50, googlenet") | |
tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size") | |
tf.app.flags.DEFINE_integer("image_size", 224, "Image size") | |
tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory") | |
tf.app.flags.DEFINE_bool("use_lms", False, "To Use LMS") | |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |
os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id | |
# generate synthetic data | |
x = np.random.randint(0, 1, size=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3)) | |
x = x.astype("float32") | |
y = np.random.randint(0, 1000, size=FLAGS.batch_size) | |
y = tf.keras.utils.to_categorical(y, 1000) | |
# def tf.data.Dataset | |
features_placeholder = tf.placeholder(x.dtype, x.shape) | |
labels_placeholder = tf.placeholder(y.dtype, y.shape) | |
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder)) | |
dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size)) | |
dataset = dataset.repeat(500) | |
iterator = dataset.make_initializable_iterator() | |
inputs, labels = iterator.get_next() | |
# build model | |
if FLAGS.model == "resnet50": | |
with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)): | |
_, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True) | |
logits = layers_dict['resnet_v1_50/logits'] | |
logits = tf.keras.layers.Flatten()(logits) | |
elif FLAGS.model == "googlenet": | |
with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()): | |
_, layers_dict = slimNet.inception.inception_v1(inputs, spatial_squeeze=False, num_classes=1000, is_training=True) | |
fmap = layers_dict['Logits'] | |
output = tf.keras.layers.GlobalAveragePooling2D()(fmap) | |
logits = tf.keras.layers.Dense(1000)(output) | |
else: | |
print("No model support") | |
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, | |
logits=logits) | |
# Create training op. | |
with tf.name_scope('adam_optimizer'): | |
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) | |
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) | |
with tf.control_dependencies(update_ops): | |
train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step()) | |
# import LMS and use | |
if FLAGS.use_lms: | |
print("USING IBM LARGE MODEL SUPPORT") | |
from tensorflow.contrib.lms import LMS | |
lms_obj = LMS({'adam_optimizer'}) | |
lms_obj.run(graph=tf.get_default_graph()) | |
# setup tf.ConfigProto for CUDA Unified memory | |
config = tf.ConfigProto() | |
if FLAGS.cuda_memory > 1: | |
config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory | |
print("USING CUDA UNIFIED MEMORY") | |
res = [] | |
# Start session and training | |
with tf.train.MonitoredTrainingSession(config=config) as sess: | |
sess.run(iterator.initializer, feed_dict={features_placeholder: x, | |
labels_placeholder: y}) | |
print("RUNNING WARMUP") | |
for w in range(5): | |
sess.run(train_step) | |
print("WARMUP DONE") | |
for b in range(1, 61): | |
t = time.time() | |
sess.run(train_step) | |
t1 = time.time() | |
_loss = sess.run(loss) | |
if b % 10 == 0: | |
print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t))) | |
res.append(FLAGS.batch_size / (t1-t)) | |
print(np.mean(res), " +- ", np.std(res)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment