elistevens · May 6, 2017 22:07
diff --git a/gpumemtest.py b/gpumemtest.py
 from __future__ import print_function

 import torch

 from torch.nn.parallel.data_parallel import DataParallel
 from torchvision.models import resnet18
 from torch.autograd import Variable

 model_list = [DataParallel(resnet18(), output_device=(i % torch.cuda.device_count())).cuda() for i in range(20)]

 while True:
    in_var = Variable(torch.randn((2, 3, 224, 224)))
    answer_var = Variable(torch.randn((2, 1000)))

    for model in model_list:
        out_var = model(in_var)

        with torch.cuda.device_of(out_var):
            loss_var = torch.nn.MSELoss()(out_var, answer_var.cuda())

        print("out_var.get_device()", out_var.get_device())
        if out_var.get_device() in {None}:
            print("backward")
            loss_var.backward()


 # out_var switches between devices as expected:
 # out_var.get_device() 0
 # out_var.get_device() 1
 # out_var.get_device() 0
 # out_var.get_device() 1
 # ...


 # nvidia-smi | fgrep MiB
 # ... in {None}
 # |  0%   46C    P2    76W / 250W |   1487MiB /  8112MiB |     79%      Default |
 # |  0%   46C    P2    81W / 250W |    607MiB /  8114MiB |     79%      Default |
 #
 # ... in {0}
 # | 27%   52C    P2    91W / 250W |   2031MiB /  8112MiB |     96%      Default |
 # | 26%   52C    P2    81W / 250W |    607MiB /  8114MiB |     95%      Default |
 #
 # ... in {0, 1}
 # | 34%   56C    P2    92W / 250W |   2509MiB /  8112MiB |     96%      Default |
 # | 28%   51C    P2    80W / 250W |    541MiB /  8114MiB |     94%      Default |
 #
 # ... in {1}
 # | 40%   60C    P2   107W / 250W |   2037MiB /  8112MiB |     92%      Default |
 # | 30%   52C    P2    94W / 250W |    607MiB /  8114MiB |     92%      Default |


 # ... in {None} with 30 models (note GPU 1 RAM unchanged):
 # | 49%   62C    P2    90W / 250W |   1987MiB /  8112MiB |     85%      Default |
 # | 33%   51C    P2    79W / 250W |    607MiB /  8114MiB |     84%      Default |

 # ... in {0, 1} with 30 models (note GPU 1 RAM unchanged):
 # |  0%   49C    P2    90W / 250W |   3491MiB /  8112MiB |     96%      Default |
 # |  0%   47C    P2   102W / 250W |    541MiB /  8114MiB |     97%      Default |


 # Full output
 # Sat May  6 14:44:21 2017
 # +-----------------------------------------------------------------------------+
 # | NVIDIA-SMI 375.39                 Driver Version: 375.39                    |
 # |-------------------------------+----------------------+----------------------+
 # | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
 # | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
 # |===============================+======================+======================|
 # |   0  GeForce GTX 1080    Off  | 0000:01:00.0     Off |                  N/A |
 # | 40%   60C    P2   107W / 250W |   2037MiB /  8112MiB |     92%      Default |
 # +-------------------------------+----------------------+----------------------+
 # |   1  GeForce GTX 1080    Off  | 0000:02:00.0     Off |                  N/A |
 # | 30%   52C    P2    94W / 250W |    607MiB /  8114MiB |     92%      Default |
 # +-------------------------------+----------------------+----------------------+
 #
 # +-----------------------------------------------------------------------------+
 # | Processes:                                                       GPU Memory |
 # |  GPU       PID  Type  Process name                               Usage      |
 # |=============================================================================|
 # |    0     30619    C   .venv/bin/python                              2035MiB |
 # |    1     30619    C   .venv/bin/python                               605MiB |
 # +-----------------------------------------------------------------------------+
	from __future__ import print_function

	import torch

	from torch.nn.parallel.data_parallel import DataParallel
	from torchvision.models import resnet18
	from torch.autograd import Variable

	model_list = [DataParallel(resnet18(), output_device=(i % torch.cuda.device_count())).cuda() for i in range(20)]

	while True:
	in_var = Variable(torch.randn((2, 3, 224, 224)))
	answer_var = Variable(torch.randn((2, 1000)))

	for model in model_list:
	out_var = model(in_var)

	with torch.cuda.device_of(out_var):
	loss_var = torch.nn.MSELoss()(out_var, answer_var.cuda())

	print("out_var.get_device()", out_var.get_device())
	if out_var.get_device() in {None}:
	print("backward")
	loss_var.backward()


	# out_var switches between devices as expected:
	# out_var.get_device() 0
	# out_var.get_device() 1
	# out_var.get_device() 0
	# out_var.get_device() 1
	# ...


	# nvidia-smi \| fgrep MiB
	# ... in {None}
	# \| 0% 46C P2 76W / 250W \| 1487MiB / 8112MiB \| 79% Default \|
	# \| 0% 46C P2 81W / 250W \| 607MiB / 8114MiB \| 79% Default \|
	#
	# ... in {0}
	# \| 27% 52C P2 91W / 250W \| 2031MiB / 8112MiB \| 96% Default \|
	# \| 26% 52C P2 81W / 250W \| 607MiB / 8114MiB \| 95% Default \|
	#
	# ... in {0, 1}
	# \| 34% 56C P2 92W / 250W \| 2509MiB / 8112MiB \| 96% Default \|
	# \| 28% 51C P2 80W / 250W \| 541MiB / 8114MiB \| 94% Default \|
	#
	# ... in {1}
	# \| 40% 60C P2 107W / 250W \| 2037MiB / 8112MiB \| 92% Default \|
	# \| 30% 52C P2 94W / 250W \| 607MiB / 8114MiB \| 92% Default \|


	# ... in {None} with 30 models (note GPU 1 RAM unchanged):
	# \| 49% 62C P2 90W / 250W \| 1987MiB / 8112MiB \| 85% Default \|
	# \| 33% 51C P2 79W / 250W \| 607MiB / 8114MiB \| 84% Default \|

	# ... in {0, 1} with 30 models (note GPU 1 RAM unchanged):
	# \| 0% 49C P2 90W / 250W \| 3491MiB / 8112MiB \| 96% Default \|
	# \| 0% 47C P2 102W / 250W \| 541MiB / 8114MiB \| 97% Default \|


	# Full output
	# Sat May 6 14:44:21 2017
	# +-----------------------------------------------------------------------------+
	# \| NVIDIA-SMI 375.39 Driver Version: 375.39 \|
	# \|-------------------------------+----------------------+----------------------+
	# \| GPU Name Persistence-M\| Bus-Id Disp.A \| Volatile Uncorr. ECC \|
	# \| Fan Temp Perf Pwr:Usage/Cap\| Memory-Usage \| GPU-Util Compute M. \|
	# \|===============================+======================+======================\|
	# \| 0 GeForce GTX 1080 Off \| 0000:01:00.0 Off \| N/A \|
	# \| 40% 60C P2 107W / 250W \| 2037MiB / 8112MiB \| 92% Default \|
	# +-------------------------------+----------------------+----------------------+
	# \| 1 GeForce GTX 1080 Off \| 0000:02:00.0 Off \| N/A \|
	# \| 30% 52C P2 94W / 250W \| 607MiB / 8114MiB \| 92% Default \|
	# +-------------------------------+----------------------+----------------------+
	#
	# +-----------------------------------------------------------------------------+
	# \| Processes: GPU Memory \|
	# \| GPU PID Type Process name Usage \|
	# \|=============================================================================\|
	# \| 0 30619 C .venv/bin/python 2035MiB \|
	# \| 1 30619 C .venv/bin/python 605MiB \|
	# +-----------------------------------------------------------------------------+