Created
August 23, 2018 20:18
-
-
Save william-r-s/2616917e49cecb3ec876844ca3548e74 to your computer and use it in GitHub Desktop.
GPU Check - verify that your job has access to enough GPUS with free memory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('n', type=int, nargs='?', default=1) | |
args = parser.parse_args() | |
import subprocess | |
subprocess.run("hostname", check=True) | |
import os | |
print("CUDA_VISIBLE_DEVICES=" + | |
os.environ.get("CUDA_VISIBLE_DEVICES", default="")) | |
result = subprocess.run( | |
"""nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits""", | |
check=True, | |
shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
mem_values = result.stdout.decode().split() | |
print(mem_values) | |
if len(mem_values) < args.n: | |
exit(1) | |
for i in range(args.n): | |
if int(mem_values[i]) > 1000: | |
print("GPU {} has too much used memory".format(i)) | |
exit(1) | |
result = subprocess.run( | |
"nvidia-smi", check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
print(result.stdout.decode()) | |
print(result.stderr.decode()) | |
result = subprocess.run( | |
"top -b -n 1", | |
shell=True, | |
check=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
print(result.stdout.decode()) | |
print(result.stderr.decode()) | |
from tensorflow.python.client import device_lib | |
def get_available_gpus(): | |
local_device_protos = device_lib.list_local_devices() | |
return [x.name for x in local_device_protos if x.device_type == 'GPU'] | |
print(get_available_gpus()) | |
import tensorflow as tf | |
for i in range(args.n): | |
with tf.device('/gpu:{}'.format(i)): | |
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') | |
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') | |
c = tf.matmul(a, b) | |
with tf.Session() as sess: | |
print(sess.run(c)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment