Created
September 12, 2017 16:11
-
-
Save MInner/8968b3b120c95d3f50b8a22a74bf66bc to your computer and use it in GitHub Desktop.
A script to generate per-line GPU memory usage trace. For more meaningful results set `CUDA_LAUNCH_BLOCKING=1`.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import linecache | |
import os | |
import pynvml3 | |
import torch | |
print_tensor_sizes = True | |
last_tensor_sizes = set() | |
gpu_profile_fn = f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_prof.txt' | |
if 'GPU_DEBUG' in os.environ: | |
print('profiling gpu usage to ', gpu_profile_fn) | |
lineno = None | |
func_name = None | |
filename = None | |
module_name = None | |
def gpu_profile(frame, event, arg): | |
# it is _about to_ execute (!) | |
global last_tensor_sizes | |
global lineno, func_name, filename, module_name | |
if event == 'line': | |
try: | |
# about _previous_ line (!) | |
if lineno is not None: | |
pynvml3.nvmlInit() | |
handle = pynvml3.nvmlDeviceGetHandleByIndex(int(os.environ['GPU_DEBUG'])) | |
meminfo = pynvml3.nvmlDeviceGetMemoryInfo(handle) | |
line = linecache.getline(filename, lineno) | |
where_str = module_name+' '+func_name+':'+str(lineno) | |
with open(gpu_profile_fn, 'a+') as f: | |
f.write(f"{where_str:<50}" | |
f":{meminfo.used/1024**2:<7.1f}Mb " | |
f"{line.rstrip()}\n") | |
if print_tensor_sizes is True: | |
for tensor in get_tensors(): | |
if not hasattr(tensor, 'dbg_alloc_where'): | |
tensor.dbg_alloc_where = where_str | |
new_tensor_sizes = {(type(x), tuple(x.size()), x.dbg_alloc_where) | |
for x in get_tensors()} | |
for t, s, loc in new_tensor_sizes - last_tensor_sizes: | |
f.write(f'+ {loc:<50} {str(s):<20} {str(t):<10}\n') | |
for t, s, loc in last_tensor_sizes - new_tensor_sizes: | |
f.write(f'- {loc:<50} {str(s):<20} {str(t):<10}\n') | |
last_tensor_sizes = new_tensor_sizes | |
pynvml3.nvmlShutdown() | |
# save details about line _to be_ executed | |
lineno = None | |
func_name = frame.f_code.co_name | |
filename = frame.f_globals["__file__"] | |
if (filename.endswith(".pyc") or | |
filename.endswith(".pyo")): | |
filename = filename[:-1] | |
module_name = frame.f_globals["__name__"] | |
lineno = frame.f_lineno | |
if 'gmwda-pytorch' not in os.path.dirname(os.path.abspath(filename)): | |
lineno = None # skip current line evaluation | |
if ('car_datasets' in filename | |
or '_exec_config' in func_name | |
or 'gpu_profile' in module_name | |
or 'tee_stdout' in module_name): | |
lineno = None # skip current | |
return gpu_profile | |
except (KeyError, AttributeError): | |
pass | |
return gpu_profile | |
def get_tensors(gpu_only=True): | |
import gc | |
for obj in gc.get_objects(): | |
try: | |
if torch.is_tensor(obj): | |
tensor = obj | |
elif hasattr(obj, 'data') and torch.is_tensor(obj.data): | |
tensor = obj.data | |
else: | |
continue | |
if tensor.is_cuda: | |
yield tensor | |
except Exception as e: | |
pass |
https://github.com/li-js/gpu_memory_profiling/blob/master/example_mnist.py has a very example of how to use it.
Shouldn't pynvml3 be py3nvml?
There is a better version of this file without all the typos and with some printing: https://github.com/li-js/gpu_memory_profiling/blob/master/gpu_profile.py
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could you please provide an explanation of the arguments
frame
,event
,args
input to your function. Thanks