Skip to content

Instantly share code, notes, and snippets.

@william-r-s
Created August 23, 2018 20:18
Show Gist options
  • Save william-r-s/2616917e49cecb3ec876844ca3548e74 to your computer and use it in GitHub Desktop.
Save william-r-s/2616917e49cecb3ec876844ca3548e74 to your computer and use it in GitHub Desktop.
GPU Check - verify that your job has access to enough GPUS with free memory
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('n', type=int, nargs='?', default=1)
args = parser.parse_args()
import subprocess
subprocess.run("hostname", check=True)
import os
print("CUDA_VISIBLE_DEVICES=" +
os.environ.get("CUDA_VISIBLE_DEVICES", default=""))
result = subprocess.run(
"""nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits""",
check=True,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
mem_values = result.stdout.decode().split()
print(mem_values)
if len(mem_values) < args.n:
exit(1)
for i in range(args.n):
if int(mem_values[i]) > 1000:
print("GPU {} has too much used memory".format(i))
exit(1)
result = subprocess.run(
"nvidia-smi", check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(result.stdout.decode())
print(result.stderr.decode())
result = subprocess.run(
"top -b -n 1",
shell=True,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
print(result.stdout.decode())
print(result.stderr.decode())
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
print(get_available_gpus())
import tensorflow as tf
for i in range(args.n):
with tf.device('/gpu:{}'.format(i)):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
with tf.Session() as sess:
print(sess.run(c))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment