taylanbil · July 13, 2020 22:49 · taylanbil · Jul 13, 2020 · taylanbil · Jul 13, 2020
diff --git a/alltoall.py b/alltoall.py
 import torch
 import torch.nn as nn
 import sys
 #sys.path.insert(0, '/usr/share/torch-xla-nightly/pytorch/xla/')
 import torch_xla.distributed.xla_multiprocessing as xmp


 def main(*a):
    import torch_xla.core.xla_model as xm
    device = xm.xla_device()
    o = torch.randn((8,1) , dtype=torch.float, device=device).reshape((8,1))
    o *= xm.get_ordinal()

    t = xm.all_to_all(o, split_dimension=0, concat_dimension=1, split_count=8, groups=None)
    xm.mark_step()
    print('INPUT', xm.get_ordinal(), o.T, flush=True)
    xm.rendezvous('hi')
    print('RESULT', xm.get_ordinal(), t, flush=True)
    xm.rendezvous('hi')


 if __name__ == '__main__':
     xmp.spawn(main, args=(), nprocs=8)
        
        
        
 # OUTPUT

 ./alltoall.sh | sort -n
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:0')
       device='xla:1')
 INPUT 0 tensor([[0., 0., 0., -0., -0., 0., 0., 0.]], device='xla:1')
 INPUT 1 tensor([[ 0.3602,  0.6926,  1.2409, -0.4852, -0.2514,  1.0309,  1.5637,  0.2221]],
 INPUT 2 tensor([[ 0.7204,  1.3852,  2.4818, -0.9704, -0.5028,  2.0619,  3.1275,  0.4442]],
 INPUT 3 tensor([[ 1.0806,  2.0779,  3.7226, -1.4556, -0.7542,  3.0928,  4.6912,  0.6663]],
 INPUT 4 tensor([[ 1.4408,  2.7705,  4.9635, -1.9408, -1.0056,  4.1238,  6.2550,  0.8884]],
 INPUT 5 tensor([[ 1.8010,  3.4631,  6.2044, -2.4260, -1.2570,  5.1547,  7.8187,  1.1105]],
 INPUT 6 tensor([[ 2.1612,  4.1557,  7.4453, -2.9112, -1.5084,  6.1857,  9.3825,  1.3326]],
 INPUT 7 tensor([[ 2.5214,  4.8484,  8.6862, -3.3964, -1.7598,  7.2166, 10.9462,  1.5547]],
 RESULT 0 tensor([[0.0000, 0.0000, 0.7204, 1.0806, 1.4408, 1.8010, 2.1612, 2.5214]],
 RESULT 1 tensor([[0.0000e+00, 1.3852e+00, 2.0779e+00, 2.7705e+00, 3.4631e+00, 4.1557e+00, 4.8484e+00, 1.5236e-35]], device='xla:0')
 RESULT 2 tensor([[0.0000, 0.0000, 1.2409, 3.7226, 4.9635, 6.2044, 7.4453, 8.6862]],
 RESULT 3 tensor([[-0.0000,  0.0000, -0.9704, -1.4556, -1.9408, -2.4260, -2.9112, -3.3964]],
 RESULT 4 tensor([[-0.0000,  0.0000, -0.5028, -0.7542, -1.0056, -1.2570, -1.5084, -1.7598]],
 RESULT 5 tensor([[0.0000, 0.0000, 1.0309, 3.0928, 4.1238, 5.1547, 6.1857, 7.2166]],
 RESULT 6 tensor([[ 0.0000,  0.0000,  3.1275,  4.6912,  6.2550,  7.8187,  9.3825, 10.9462]],
 RESULT 7 tensor([[0.0000, 0.0000, 0.4442, 0.6663, 0.8884, 1.1105, 1.3326, 1.5547]],
	import torch
	import torch.nn as nn
	import sys
	#sys.path.insert(0, '/usr/share/torch-xla-nightly/pytorch/xla/')
	import torch_xla.distributed.xla_multiprocessing as xmp


	def main(*a):
	import torch_xla.core.xla_model as xm
	device = xm.xla_device()
	o = torch.randn((8,1) , dtype=torch.float, device=device).reshape((8,1))
	o *= xm.get_ordinal()

	t = xm.all_to_all(o, split_dimension=0, concat_dimension=1, split_count=8, groups=None)
	xm.mark_step()
	print('INPUT', xm.get_ordinal(), o.T, flush=True)
	xm.rendezvous('hi')
	print('RESULT', xm.get_ordinal(), t, flush=True)
	xm.rendezvous('hi')


	if __name__ == '__main__':
	xmp.spawn(main, args=(), nprocs=8)



	# OUTPUT

	./alltoall.sh \| sort -n
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:0')
	device='xla:1')
	INPUT 0 tensor([[0., 0., 0., -0., -0., 0., 0., 0.]], device='xla:1')
	INPUT 1 tensor([[ 0.3602, 0.6926, 1.2409, -0.4852, -0.2514, 1.0309, 1.5637, 0.2221]],
	INPUT 2 tensor([[ 0.7204, 1.3852, 2.4818, -0.9704, -0.5028, 2.0619, 3.1275, 0.4442]],
	INPUT 3 tensor([[ 1.0806, 2.0779, 3.7226, -1.4556, -0.7542, 3.0928, 4.6912, 0.6663]],
	INPUT 4 tensor([[ 1.4408, 2.7705, 4.9635, -1.9408, -1.0056, 4.1238, 6.2550, 0.8884]],
	INPUT 5 tensor([[ 1.8010, 3.4631, 6.2044, -2.4260, -1.2570, 5.1547, 7.8187, 1.1105]],
	INPUT 6 tensor([[ 2.1612, 4.1557, 7.4453, -2.9112, -1.5084, 6.1857, 9.3825, 1.3326]],
	INPUT 7 tensor([[ 2.5214, 4.8484, 8.6862, -3.3964, -1.7598, 7.2166, 10.9462, 1.5547]],
	RESULT 0 tensor([[0.0000, 0.0000, 0.7204, 1.0806, 1.4408, 1.8010, 2.1612, 2.5214]],
	RESULT 1 tensor([[0.0000e+00, 1.3852e+00, 2.0779e+00, 2.7705e+00, 3.4631e+00, 4.1557e+00, 4.8484e+00, 1.5236e-35]], device='xla:0')
	RESULT 2 tensor([[0.0000, 0.0000, 1.2409, 3.7226, 4.9635, 6.2044, 7.4453, 8.6862]],
	RESULT 3 tensor([[-0.0000, 0.0000, -0.9704, -1.4556, -1.9408, -2.4260, -2.9112, -3.3964]],
	RESULT 4 tensor([[-0.0000, 0.0000, -0.5028, -0.7542, -1.0056, -1.2570, -1.5084, -1.7598]],
	RESULT 5 tensor([[0.0000, 0.0000, 1.0309, 3.0928, 4.1238, 5.1547, 6.1857, 7.2166]],
	RESULT 6 tensor([[ 0.0000, 0.0000, 3.1275, 4.6912, 6.2550, 7.8187, 9.3825, 10.9462]],
	RESULT 7 tensor([[0.0000, 0.0000, 0.4442, 0.6663, 0.8884, 1.1105, 1.3326, 1.5547]],