jimmy15923 · November 22, 2018 05:02
diff --git a/LMS_UM_test.py b/LMS_UM_test.py
 """
 Tesing code for IBM LMS / CUDA Unified Memory
 Run this script with CUDA Unified Memory by
 ```
 python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5
 ```

 Run this script with IBM Large Model Support
 ```
 python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --use_lms=True
 ```

 """
 import numpy as np
 import time
 import os

 import tensorflow as tf
 import tensorflow.contrib.slim as slim
 import tensorflow.contrib.slim.nets as slimNet

 tf.logging.set_verbosity(tf.logging.INFO)
 FLAGS = tf.app.flags.FLAGS
 tf.app.flags.DEFINE_string('f', '', 'kernel')
 tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using")
 tf.app.flags.DEFINE_string("model", "resnet50", "select from resnet50, googlenet")
 tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size")
 tf.app.flags.DEFINE_integer("image_size", 224, "Image size")
 tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory")
 tf.app.flags.DEFINE_bool("use_lms", False, "To Use LMS")

 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id

 # generate synthetic data
 x = np.random.randint(0, 1, size=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3))
 x = x.astype("float32")
 y = np.random.randint(0, 1000, size=FLAGS.batch_size)
 y = tf.keras.utils.to_categorical(y, 1000)

 # def tf.data.Dataset
 features_placeholder = tf.placeholder(x.dtype, x.shape)
 labels_placeholder = tf.placeholder(y.dtype, y.shape)

 dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
 dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size))
 dataset = dataset.repeat(500)
 iterator = dataset.make_initializable_iterator()
 inputs, labels = iterator.get_next()

 # build model
 if FLAGS.model == "resnet50":
    with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)):
        _, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True)
        logits = layers_dict['resnet_v1_50/logits']
        logits = tf.keras.layers.Flatten()(logits)
        
 elif FLAGS.model == "googlenet":
    with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()):
        _, layers_dict = slimNet.inception.inception_v1(inputs, spatial_squeeze=False, num_classes=1000, is_training=True)
        fmap = layers_dict['Logits']
        output = tf.keras.layers.GlobalAveragePooling2D()(fmap)
        logits = tf.keras.layers.Dense(1000)(output)
 else:
    print("No model support")

 loss = tf.losses.softmax_cross_entropy(onehot_labels=labels,
                                       logits=logits)
 # Create training op.
 with tf.name_scope('adam_optimizer'):
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step())

 # import LMS and use
 if FLAGS.use_lms:
    print("USING IBM LARGE MODEL SUPPORT")
    from tensorflow.contrib.lms import LMS
    lms_obj = LMS({'adam_optimizer'})
    lms_obj.run(graph=tf.get_default_graph())
    
    
 # setup tf.ConfigProto for CUDA Unified memory
 config = tf.ConfigProto()
 if FLAGS.cuda_memory > 1:
    config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory
    print("USING CUDA UNIFIED MEMORY")

 res = []
 # Start session and training
 with tf.train.MonitoredTrainingSession(config=config) as sess:
    sess.run(iterator.initializer, feed_dict={features_placeholder: x,
                                          labels_placeholder: y})
    print("RUNNING WARMUP")
    for w in range(5):
        sess.run(train_step)
    print("WARMUP DONE")
    for b in range(1, 61):
        t = time.time()
        sess.run(train_step)
        t1 = time.time()
        _loss = sess.run(loss)
        if b % 10 == 0:
            print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t)))
        res.append(FLAGS.batch_size / (t1-t))
 print(np.mean(res), " +- ", np.std(res))
	"""
	Tesing code for IBM LMS / CUDA Unified Memory
	Run this script with CUDA Unified Memory by
	```
	python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --cuda_memory=5
	```

	Run this script with IBM Large Model Support
	```
	python LMS_UM_test.py --image_size=224 --batch_size=256 --gpu_id=1 --use_lms=True
	```

	"""
	import numpy as np
	import time
	import os

	import tensorflow as tf
	import tensorflow.contrib.slim as slim
	import tensorflow.contrib.slim.nets as slimNet

	tf.logging.set_verbosity(tf.logging.INFO)
	FLAGS = tf.app.flags.FLAGS
	tf.app.flags.DEFINE_string('f', '', 'kernel')
	tf.app.flags.DEFINE_string("gpu_id", "0", "idx of GPU using")
	tf.app.flags.DEFINE_string("model", "resnet50", "select from resnet50, googlenet")
	tf.app.flags.DEFINE_integer("batch_size", 512, "Batch size")
	tf.app.flags.DEFINE_integer("image_size", 224, "Image size")
	tf.app.flags.DEFINE_float("cuda_memory", 1, "pre-alloctaed of CUDA unified memory")
	tf.app.flags.DEFINE_bool("use_lms", False, "To Use LMS")

	os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
	os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id

	# generate synthetic data
	x = np.random.randint(0, 1, size=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3))
	x = x.astype("float32")
	y = np.random.randint(0, 1000, size=FLAGS.batch_size)
	y = tf.keras.utils.to_categorical(y, 1000)

	# def tf.data.Dataset
	features_placeholder = tf.placeholder(x.dtype, x.shape)
	labels_placeholder = tf.placeholder(y.dtype, y.shape)

	dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
	dataset = dataset.batch(FLAGS.batch_size).filter(lambda features, labels: tf.equal(tf.shape(labels)[0], FLAGS.batch_size))
	dataset = dataset.repeat(500)
	iterator = dataset.make_initializable_iterator()
	inputs, labels = iterator.get_next()

	# build model
	if FLAGS.model == "resnet50":
	with slim.arg_scope(slimNet.resnet_utils.resnet_arg_scope(batch_norm_decay=0.99)):
	_, layers_dict = slimNet.resnet_v1.resnet_v1_50(inputs, num_classes=1000, global_pool=True, is_training=True)
	logits = layers_dict['resnet_v1_50/logits']
	logits = tf.keras.layers.Flatten()(logits)

	elif FLAGS.model == "googlenet":
	with slim.arg_scope(slimNet.inception.inception_v1_arg_scope()):
	_, layers_dict = slimNet.inception.inception_v1(inputs, spatial_squeeze=False, num_classes=1000, is_training=True)
	fmap = layers_dict['Logits']
	output = tf.keras.layers.GlobalAveragePooling2D()(fmap)
	logits = tf.keras.layers.Dense(1000)(output)
	else:
	print("No model support")

	loss = tf.losses.softmax_cross_entropy(onehot_labels=labels,
	logits=logits)
	# Create training op.
	with tf.name_scope('adam_optimizer'):
	optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
	update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
	with tf.control_dependencies(update_ops):
	train_step = optimizer.minimize(loss, global_step=tf.train.get_global_step())

	# import LMS and use
	if FLAGS.use_lms:
	print("USING IBM LARGE MODEL SUPPORT")
	from tensorflow.contrib.lms import LMS
	lms_obj = LMS({'adam_optimizer'})
	lms_obj.run(graph=tf.get_default_graph())


	# setup tf.ConfigProto for CUDA Unified memory
	config = tf.ConfigProto()
	if FLAGS.cuda_memory > 1:
	config.gpu_options.per_process_gpu_memory_fraction = FLAGS.cuda_memory
	print("USING CUDA UNIFIED MEMORY")

	res = []
	# Start session and training
	with tf.train.MonitoredTrainingSession(config=config) as sess:
	sess.run(iterator.initializer, feed_dict={features_placeholder: x,
	labels_placeholder: y})
	print("RUNNING WARMUP")
	for w in range(5):
	sess.run(train_step)
	print("WARMUP DONE")
	for b in range(1, 61):
	t = time.time()
	sess.run(train_step)
	t1 = time.time()
	_loss = sess.run(loss)
	if b % 10 == 0:
	print("Num:", b, ", Loss: ", _loss, ", Elapsed time: ", t1 - t, "Images/sec: ", (FLAGS.batch_size / (t1-t)))
	res.append(FLAGS.batch_size / (t1-t))
	print(np.mean(res), " +- ", np.std(res))
No results found