Last active
January 10, 2018 15:26
-
-
Save jojonki/fadf18da5f86d803f09493bc7e6a818d to your computer and use it in GitHub Desktop.
Performance on Titan V
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # see original code | |
| # https://discuss.pytorch.org/t/solved-titan-v-on-pytorch-0-3-0-cuda-9-0-cudnn-7-0-is-much-slower-than-1080-ti/11320/10?u=jef | |
| import torch | |
| from torchvision.models import vgg16,densenet121,resnet152 | |
| from time import time | |
| import torch.nn as nn | |
| import torch.backends.cudnn as cudnn | |
| import torch.optim | |
| from torch.autograd import Variable | |
| import torchvision.models as models | |
| torch.backends.cudnn.benchmark=True | |
| model_names = sorted(name for name in models.__dict__ | |
| if name.islower() and not name.startswith("__") | |
| and callable(models.__dict__[name])) | |
| print('cuda version=', torch.version.cuda) | |
| print('cudnn version=', torch.backends.cudnn.version()) | |
| for arch in ['densenet121', 'vgg16', 'resnet152']: | |
| model = models.__dict__[arch]().cuda() | |
| loss = nn.CrossEntropyLoss().cuda() | |
| optimizer = torch.optim.SGD(model.parameters(), 0.001, | |
| momentum=0.9, | |
| weight_decay=1e-5) | |
| durations = [] | |
| num_runs = 100 | |
| for i in range(num_runs + 1): | |
| x = torch.rand(16, 3, 224, 224) | |
| x_var = torch.autograd.Variable(x).cuda() | |
| target = Variable(torch.LongTensor(16).fill_(1).cuda()) | |
| torch.cuda.synchronize() | |
| t1 = time() | |
| out = model(x_var) | |
| err = loss(out, target) | |
| err.backward() | |
| optimizer.step() | |
| torch.cuda.synchronize() | |
| t2 = time() | |
| # treat the initial run as warm up and don't count | |
| if i > 0: | |
| durations.append(t2 - t1) | |
| print('{} FP 32 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000)) | |
| model = models.__dict__[arch]().cuda().half() | |
| loss = nn.CrossEntropyLoss().cuda() | |
| optimizer = torch.optim.SGD(model.parameters(), 0.001, | |
| momentum=0.9, | |
| weight_decay=1e-5) | |
| durations = [] | |
| num_runs = 100 | |
| for i in range(num_runs + 1): | |
| x = torch.rand(16, 3, 224, 224) | |
| x_var = torch.autograd.Variable(x).cuda().half() | |
| target = Variable(torch.LongTensor(16).fill_(1).cuda()) | |
| torch.cuda.synchronize() | |
| t1 = time() | |
| out = model(x_var) | |
| err = loss(out, target) | |
| err.backward() | |
| optimizer.step() | |
| torch.cuda.synchronize() | |
| t2 = time() | |
| # treat the initial run as warm up and don't count | |
| if i > 0: | |
| durations.append(t2 - t1) | |
| print('{} FP 16 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cuda version= 9.1.85 | |
| cudnn version= 7005 | |
| densenet121 FP 32 avg over 100 runs: 85.31552791595459 ms | |
| densenet121 FP 16 avg over 100 runs: 66.39776229858398 ms | |
| vgg16 FP 32 avg over 100 runs: 108.35402250289917 ms | |
| vgg16 FP 16 avg over 100 runs: 67.54538059234619 ms | |
| resnet152 FP 32 avg over 100 runs: 168.9338254928589 ms | |
| resnet152 FP 16 avg over 100 runs: 107.77381420135498 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment