Created
April 13, 2021 05:11
-
-
Save harisankarh/81c6f16cfea6caacee7adc4ce66fbc0d to your computer and use it in GitHub Desktop.
python program to check the status of gpus in a server
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
if 'CUDA_VISIBLE_DEVICES' in os.environ: | |
print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES']) | |
if torch.cuda.is_available() is False: | |
print('no gpu available') | |
total_devices = torch.cuda.device_count() | |
print(f'{total_devices} gpus available') | |
for d in range(total_devices): | |
print('='*10) | |
print(f'gpu {d}') | |
t = torch.cuda.get_device_properties(0).total_memory | |
r = torch.cuda.memory_reserved(0) | |
a = torch.cuda.memory_allocated(0) | |
f = r-a # free inside reserved | |
print(f'total: {t} reserved: {r} allocated: {a} free: {f}') | |
device = torch.device("cuda:0") | |
a = torch.zeros(4,3) | |
try: | |
a = a.to(device) | |
print(f'successfully allocated memory to {d}') | |
except: | |
print(f'unable to allocate memory to gpu {d}') | |
print('NOTE: the numbering of gpus within this program need not be consistent with global number of gpus used in CUDA_VISIBLE_DEVICES') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment