Created
September 13, 2018 22:40
-
-
Save bearpelican/4adcadce7f0687681bc9be2324ebafc8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import os | |
| import torch | |
| import torch.nn as nn | |
| import torch.backends.cudnn as cudnn | |
| import torch.distributed as dist | |
| import torch.utils.data | |
| import torch.utils.data.distributed | |
| from torch.nn.parallel import DistributedDataParallel | |
| cudnn.benchmark = True | |
| from torchvision.models import resnet50 | |
| def get_parser(): | |
| parser = argparse.ArgumentParser(description='PyTorch Minimal Distributed') | |
| parser.add_argument('--disable-fp16', action='store_true', help='Run with fp16 training') | |
| parser.add_argument('--dist-url', default='env://', type=str, | |
| help='url used to set up distributed training') | |
| parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') | |
| parser.add_argument('--local_rank', default=0, type=int, | |
| help='Used for multi-process training. Can either be manually set ' + | |
| 'or automatically set by using \'python -m multiproc\'.') | |
| return parser | |
| cudnn.benchmark = True | |
| args = get_parser().parse_args() | |
| def env_world_size(): return int(os.environ['WORLD_SIZE']) | |
| def env_rank(): return int(os.environ['RANK']) | |
| def sum_tensor(tensor): | |
| rt = tensor.clone() | |
| dist.all_reduce(rt, op=dist.reduce_op.SUM) | |
| return rt | |
| import torch | |
| import torch.nn as nn | |
| from torch.autograd import Variable | |
| from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors | |
| def BN_convert_float(module): | |
| if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): | |
| module.float() | |
| for child in module.children(): | |
| BN_convert_float(child) | |
| return module | |
| def network_to_half(network): | |
| return BN_convert_float(network.half()) | |
| def main(): | |
| print('Distributed initializing process group') | |
| torch.cuda.set_device(args.local_rank) | |
| process_group = dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=env_world_size(), rank=env_rank()) | |
| assert(env_world_size() == dist.get_world_size()) | |
| print("Distributed: success (%d/%d)"%(args.local_rank, dist.get_world_size())) | |
| print('Loading model') | |
| model = resnet50().cuda() | |
| # if not args.disable_fp16: model = model.half() | |
| if not args.disable_fp16: model = network_to_half(model) | |
| print('Loading distributed') | |
| model = DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, process_group=process_group) | |
| print('Distributed model loaded') | |
| print('Deadlock may happen here if fp16 enabled') | |
| tensor = torch.tensor([1.0]).float().cuda() | |
| print('Creating tensor:', tensor.item()) | |
| output = sum_tensor(tensor) | |
| print('Able to sync machines:', output.item()) | |
| if __name__ == '__main__': | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment