Created
February 23, 2022 21:54
-
-
Save jamesr66a/31c62bd5493586b112aeb234c4e77fac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARNING:torch.distributed.run: | |
***************************************** | |
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. | |
***************************************** | |
REPLICATE config: False -> MultiUseParameterConfig.TRANSMIT | |
GraphModule( | |
(submod_0): GraphModule() | |
(submod_1): GraphModule() | |
(submod_2): GraphModule() | |
(_loss): MSELoss() | |
) | |
def forward(self, x, target): | |
submod_0 = self.submod_0(x) | |
getitem_2 = submod_0[2] | |
getitem = submod_0[0] | |
getitem_1 = submod_0[1] | |
submod_1 = self.submod_1(getitem, getitem_2) | |
getitem_4 = submod_1[1] | |
getitem_3 = submod_1[0] | |
submod_2 = self.submod_2(getitem_3, getitem_1, getitem_4) | |
_loss = self._loss(submod_2, target) | |
stage_backward = pippy_IR_stage_backward(stage_output = _loss, output_grads = None, input_values = [submod_2, target]); target = None | |
getitem_5 = stage_backward[0] | |
getitem_6 = stage_backward[1]; stage_backward = None | |
stage_backward_1 = pippy_IR_stage_backward(stage_output = submod_2, output_grads = getitem_5, input_values = [getitem_3, getitem_1, getitem_4]); submod_2 = getitem_5 = getitem_3 = getitem_1 = getitem_4 = None | |
getitem_7 = stage_backward_1[0] | |
getitem_8 = stage_backward_1[1] | |
getitem_9 = stage_backward_1[2]; stage_backward_1 = None | |
stage_backward_2 = pippy_IR_stage_backward(stage_output = submod_1, output_grads = [getitem_7, getitem_9], input_values = [getitem, getitem_2]); submod_1 = getitem_7 = getitem_9 = getitem = getitem_2 = None | |
getitem_10 = stage_backward_2[0] | |
getitem_11 = stage_backward_2[1]; stage_backward_2 = None | |
stage_backward_3 = pippy_IR_stage_backward(stage_output = submod_0, output_grads = [getitem_10, getitem_8, getitem_11], input_values = [x]); submod_0 = getitem_10 = getitem_8 = getitem_11 = x = None | |
getitem_12 = stage_backward_3[0]; stage_backward_3 = None | |
return _loss | |
/fsx/users/jamesreed/pipeline_for_real/pippy/PipelineDriver.py:394: UserWarning: Running pipeline with 3 stages on world_size of 10. Remaining ranks will be idle. | |
warnings.warn(f'Running pipeline with {len(executor_descriptors)} stages on world_size of {self.world_size}. ' | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=0) with future 0x55ae1ef7c9f0 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=0) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=0) with future 0x55ae1ef7c9f0 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=0) with future 0x55ae1ef7c9f0 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=0) GloballyUniqueId(created_on=0, local_id=0) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=0) GloballyUniqueId(created_on=0, local_id=5) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=0) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=7) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=8) with future 0x55ae1ef7df60 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=8) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=0) GloballyUniqueId(created_on=0, local_id=5) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=9) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=7) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=10) with future 0x55ae1efc0500 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=10) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=0) with future 0x55ae1ef7c9f0 | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=11) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=9) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=12) with future 0x55ae1ee97370 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=12) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=11) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=8) with future 0x55ae1ef7df60 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=10) with future 0x55ae1efc0500 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=12) with future 0x55ae1ee97370 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=6) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=0, local_id=8) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=0, local_id=10) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=0, local_id=12) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=1) with future 0x7f47cc006750 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=1) with future 0x7f47cc006750 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=3) GloballyUniqueId(created_on=0, local_id=4) | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=1) GloballyUniqueId(created_on=0, local_id=2) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=1) with future 0x7f47cc006750 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=0, local_id=15) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=10) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=0, local_id=16) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=8) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=17) with future 0x7f47bc006ac0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=1) with future 0x7f47cc006750 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=0) with future 0x7f47c80085f0 | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=0) with future 0x7f47c80085f0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=0) GloballyUniqueId(created_on=1, local_id=1) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=0) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=17) with future 0x7f47bc006ac0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=17) with future 0x7f47bc006ac0 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=3) with future 0x7f47c8008a30 | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=3) with future 0x7f47c8008a30 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=0, local_id=28) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=12) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=3) GloballyUniqueId(created_on=1, local_id=4) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=3) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=0, local_id=18) | |
(88627) ^^^^ Scenario 1 (created_on=0, local_id=30) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f085c0078a0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=0, local_id=51) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=12) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=0) with future 0x7f085c009990 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=0) with future 0x7f085c009990 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=0) GloballyUniqueId(created_on=2, local_id=1) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=0) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=0, local_id=68) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=10) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=0, local_id=69) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=8) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=2) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=34) with future 0x7f0854007ee0 | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=0, local_id=35) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=3) with future 0x7f0854009ed0 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=3) with future 0x7f0854009ed0 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=0) GloballyUniqueId(created_on=0, local_id=78) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=0) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=3) GloballyUniqueId(created_on=2, local_id=4) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=79) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=3) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=2, local_id=5) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=34) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=83) with future 0x55ae1f034820 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=83) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=6) with future 0x7f085400a2c0 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=6) with future 0x7f085400a2c0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=6) GloballyUniqueId(created_on=2, local_id=7) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=6) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=8) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=83) GloballyUniqueId(created_on=0, local_id=84) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=83) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=85) with future 0x55ae1f037130 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:747 GloballyUniqueId(created_on=0, local_id=85) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=39) with future 0x7f085400bd20 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=39) GloballyUniqueId(created_on=0, local_id=40) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=0) GloballyUniqueId(created_on=0, local_id=78) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=83) GloballyUniqueId(created_on=0, local_id=84) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=79) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=83) with future 0x55ae1f034820 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=70) with future 0x7f47a4006ae0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=70) with future 0x7f47a4006ae0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=70) with future 0x7f47a4006ae0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=39) with future 0x7f085400bd20 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=39) with future 0x7f085400bd20 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=73) GloballyUniqueId(created_on=0, local_id=80) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=42) with future 0x7f083c008ee0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=76) GloballyUniqueId(created_on=0, local_id=82) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=53) with future 0x7f08d800c410 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=53) with future 0x7f08d800c410 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=53) with future 0x7f08d800c410 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=53) with future 0x7f08d800c410 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=59) GloballyUniqueId(created_on=0, local_id=81) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=0) GloballyUniqueId(created_on=1, local_id=1) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=3) GloballyUniqueId(created_on=1, local_id=4) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=1, local_id=2) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=1, local_id=5) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=0) with future 0x7f47c80085f0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=3) with future 0x7f47c8008a30 | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=0) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=10) with future 0x55ae1efc0500 | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=3) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=8) with future 0x55ae1ef7df60 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=1, local_id=2) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=1, local_id=5) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=17) with future 0x7f47bc006ac0 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=20) with future 0x7f47bc0080c0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=0, local_id=21) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=20) with future 0x7f47bc0080c0 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=23) with future 0x7f47c0007c60 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=0, local_id=24) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=23) with future 0x7f47c0007c60 | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=0) with future 0x55ae1ef7c9f0 | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=86) with future 0x7f7ed4005e80 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=86) with future 0x7f7ed4005e80 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=86) GloballyUniqueId(created_on=0, local_id=87) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=86) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=88) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=89) with future 0x7f7ed400e250 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=89) with future 0x7f7ed400e250 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=89) GloballyUniqueId(created_on=0, local_id=90) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=89) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=86) GloballyUniqueId(created_on=0, local_id=87) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=88) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=92) with future 0x7f7ed400ee90 | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=92) with future 0x7f7ed400ee90 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=92) GloballyUniqueId(created_on=0, local_id=93) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=92) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=89) GloballyUniqueId(created_on=0, local_id=90) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=86) with future 0x7f7ed4005e80 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=95) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=6) | |
(88625) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=96) with future 0x7f7ed400fd80(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=73) GloballyUniqueId(created_on=0, local_id=91) | |
(88625) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=96) with future 0x7f7ed400fd80 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=96) GloballyUniqueId(created_on=0, local_id=97) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=96) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=92) GloballyUniqueId(created_on=0, local_id=93) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=6) GloballyUniqueId(created_on=0, local_id=95) | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=86) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=89) with future 0x7f7ed400e250 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=0, local_id=27) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=6) with future 0x55ae1ef04700 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=0, local_id=52) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=59) GloballyUniqueId(created_on=0, local_id=94) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=0, local_id=50) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=96) GloballyUniqueId(created_on=0, local_id=97) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=0, local_id=29) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=0) GloballyUniqueId(created_on=2, local_id=1) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=2) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=0) with future 0x7f085c009990 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=76) GloballyUniqueId(created_on=0, local_id=98) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=73) with future 0x7f47d4006c00 | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=89) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=96) with future 0x7f7ed400fd80 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=73) with future 0x7f47d4006c00 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=9) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=0) | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=96) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=76) with future 0x7f47c8008b50 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=76) with future 0x7f47c8008b50 | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=92) with future 0x7f7ed400ee90 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=3) GloballyUniqueId(created_on=2, local_id=4) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=2, local_id=5) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=3) with future 0x7f0854009ed0 | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=92) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=2, local_id=10) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=34) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=56) GloballyUniqueId(created_on=0, local_id=66) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=6) GloballyUniqueId(created_on=2, local_id=7) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=62) GloballyUniqueId(created_on=0, local_id=67) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30)(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=1) with future 0x7f47cc006750 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=9) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=59) with future 0x7f083c009490 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=8) with future 0x7f47c800bb90 | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=8) with future 0x7f47c800bb90 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=11) with future 0x7f08d8014060 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=11) with future 0x7f08d8014060 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=8) GloballyUniqueId(created_on=1, local_id=9) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=8) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=1, local_id=10) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=17) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=11) GloballyUniqueId(created_on=2, local_id=12) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=11) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=2, local_id=10) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=11) with future 0x7f47c80085f0(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=8) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=11) with future 0x7f47c80085f0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=3) with future 0x7f08d80152d0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=13) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=11) GloballyUniqueId(created_on=1, local_id=12) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=11) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=8) GloballyUniqueId(created_on=1, local_id=9) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=1, local_id=10) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=14) with future 0x7f47b8008620 | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=14) with future 0x7f47b8008620 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=14) GloballyUniqueId(created_on=1, local_id=15) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=14) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=11) GloballyUniqueId(created_on=1, local_id=12) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=14) with future 0x7f08540082b0 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=14) with future 0x7f08540082b0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=8) with future 0x7f47c800bb90 | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=17) with future 0x7f47b80097b0(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=56) GloballyUniqueId(created_on=1, local_id=13) | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=17) with future 0x7f47b80097b0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=17) GloballyUniqueId(created_on=1, local_id=18) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=14) GloballyUniqueId(created_on=2, local_id=15) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=17) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=14) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=34) with future 0x7f0854007ee0 | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=3) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=6) with future 0x7f085400a2c0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f085c0078a0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=1, local_id=19) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=17) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=59) with future 0x7f083c009490 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=11) with future 0x7f47c80085f0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=17) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=14) GloballyUniqueId(created_on=1, local_id=15) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=18) with future 0x7f0830008100 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=18) with future 0x7f0830008100 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=17) GloballyUniqueId(created_on=1, local_id=18) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=18) GloballyUniqueId(created_on=2, local_id=19) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=18) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=11) GloballyUniqueId(created_on=2, local_id=12) | |
(88626) Instantiating OwnerRRef GloballyUniqueId(created_on=1, local_id=22) with future 0x7f47b8008200 | |
(88626) Populating OwnerRRef GloballyUniqueId(created_on=1, local_id=22) with future 0x7f47b8008200 | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=13) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=1, local_id=22) GloballyUniqueId(created_on=1, local_id=23) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=1, local_id=22) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=42) GloballyUniqueId(created_on=2, local_id=20) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=42) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=17) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=11) with future 0x7f08d8014060 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=1, local_id=19) | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=8)(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=62) GloballyUniqueId(created_on=1, local_id=16) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=21) with future 0x7f0854008160 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=21) with future 0x7f0854008160 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=1, local_id=20) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=17) with future 0x7f47bc006ac0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=21) GloballyUniqueId(created_on=2, local_id=22) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=21) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=17) with future 0x7f47b80097b0 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=14) with future 0x7f47b8008620 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=23) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=30) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=1, local_id=22) GloballyUniqueId(created_on=1, local_id=23) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=14) GloballyUniqueId(created_on=2, local_id=15) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=18) GloballyUniqueId(created_on=2, local_id=19) | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=11)(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=56) with future 0x7f0864007a50 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=70) GloballyUniqueId(created_on=0, local_id=71) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=25) with future 0x7f08300093a0 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=25) with future 0x7f08300093a0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=25) GloballyUniqueId(created_on=2, local_id=26) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=25) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=42) GloballyUniqueId(created_on=2, local_id=20) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f085c0078a0 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=56) with future 0x7f0864007a50 | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=14) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=2, local_id=16) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=1, local_id=24) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=6) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=18) with future 0x7f0830008100 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=62) with future 0x7f085c00a030 | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=1, local_id=22) with future 0x7f47b8008200 | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=17) | |
(88627) ^^^^ Scenario 2 (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=2, local_id=23) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=10) with future 0x55ae1efc0500 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=28) with future 0x7f0874008680 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=28) with future 0x7f0874008680 | |
(88626) ^^^^ Destructing OwnerRRef (created_on=1, local_id=22) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=28) GloballyUniqueId(created_on=2, local_id=29) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=28) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=11) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=8) with future 0x55ae1ef7df60 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=42) GloballyUniqueId(created_on=2, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=0, local_id=42) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=62) with future 0x7f085c00a030 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=31) with future 0x7f0830008b00 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=31) with future 0x7f0830008b00 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=10) GloballyUniqueId(created_on=1, local_id=20) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=31) GloballyUniqueId(created_on=2, local_id=32) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=31) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f085c0078a0 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=8) GloballyUniqueId(created_on=1, local_id=24) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=14) with future 0x7f08540082b0 | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=2, local_id=36) with future 0x7f083000ac10 | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=2, local_id=36) with future 0x7f083000ac10 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=2, local_id=36) GloballyUniqueId(created_on=2, local_id=37) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=18)(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:474 GloballyUniqueId(created_on=2, local_id=36) | |
(88627) ^^^^ Scenario 1 (created_on=0, local_id=30) | |
(88627) Instantiating OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f0874009a90 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=0, local_id=31) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=25) GloballyUniqueId(created_on=2, local_id=26) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=14) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=23) with future 0x7f47c0007c60 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=2, local_id=27) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=21) GloballyUniqueId(created_on=2, local_id=22) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=42) GloballyUniqueId(created_on=2, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=31) GloballyUniqueId(created_on=2, local_id=32) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:769 GloballyUniqueId(created_on=0, local_id=53) GloballyUniqueId(created_on=0, local_id=54) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=2, local_id=24) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=2, local_id=34) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=42) with future 0x7f083c008ee0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=28) GloballyUniqueId(created_on=2, local_id=29) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=25) with future 0x7f08300093a0 | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=2, local_id=36) GloballyUniqueId(created_on=2, local_id=37) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=2, local_id=33) | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=21) with future 0x7f0854008160 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:787 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=2, local_id=38) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=25) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=21) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=23) with future 0x7f47c0007c60 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=31) with future 0x7f0830008b00 | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=12) with future 0x55ae1ee97370 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=28) with future 0x7f0874008680 | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=31) | |
(88625) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=12) with future 0x55ae1ee97370 | |
(88627) Waiting on OwnerRRef GloballyUniqueId(created_on=2, local_id=36) with future 0x7f083000ac10 | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=28) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=20) with future 0x7f47bc0080c0 | |
(88627) ^^^^ Destructing OwnerRRef (created_on=2, local_id=36) | |
(88626) Waiting on OwnerRRef GloballyUniqueId(created_on=0, local_id=20) with future 0x7f47bc0080c0 | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=2, local_id=27) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=2, local_id=24) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=12) GloballyUniqueId(created_on=2, local_id=34) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=2, local_id=33) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=2, local_id=38) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=2, local_id=16) | |
(88627) Populating OwnerRRef GloballyUniqueId(created_on=0, local_id=30) with future 0x7f0874009a90 | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=85) GloballyUniqueId(created_on=0, local_id=85) | |
(88625) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=83) GloballyUniqueId(created_on=0, local_id=83) | |
Traceback (most recent call last): | |
File "/fsx/users/jamesreed/pipeline_for_real/test/local_test_forward_backward.py", line 105, in <module> | |
out = pipe_driver.run(input, target, chunks=CHUNKS, _debug_mask_minibatches = DEBUG_MASK_MINIBATCHES) | |
File "/fsx/users/jamesreed/pipeline_for_real/pippy/PipelineDriver.py", line 584, in run | |
return self._retrieve_output_values(microbatch_interpreters, last_nodes, _debug_mask_minibatches, splits_per_arg) | |
File "/fsx/users/jamesreed/pipeline_for_real/pippy/PipelineDriver.py", line 594, in _retrieve_output_values | |
local_results = [to_here(result) for result in output_vals] | |
File "/fsx/users/jamesreed/pipeline_for_real/pippy/PipelineDriver.py", line 594, in <listcomp> | |
local_results = [to_here(result) for result in output_vals] | |
File "/fsx/users/jamesreed/pipeline_for_real/pippy/PipelineDriver.py", line 45, in to_here | |
return a.to_here() | |
RuntimeError: RPCErr:1:RPC ran for more than set timeout (60000 ms) and will now be marked with an error | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=85) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=30) GloballyUniqueId(created_on=0, local_id=31) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=20) GloballyUniqueId(created_on=0, local_id=21) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=17) GloballyUniqueId(created_on=0, local_id=18) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=0, local_id=30) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=39) GloballyUniqueId(created_on=0, local_id=40) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=42) GloballyUniqueId(created_on=0, local_id=43) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=23) GloballyUniqueId(created_on=0, local_id=24) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=70) GloballyUniqueId(created_on=0, local_id=71) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=73) GloballyUniqueId(created_on=0, local_id=74) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=45) GloballyUniqueId(created_on=0, local_id=46) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=76) GloballyUniqueId(created_on=0, local_id=77) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=53) GloballyUniqueId(created_on=0, local_id=54) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=56) GloballyUniqueId(created_on=0, local_id=57) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=59) GloballyUniqueId(created_on=0, local_id=60) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=62) GloballyUniqueId(created_on=0, local_id=63) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=34) GloballyUniqueId(created_on=0, local_id=35) | |
(88625) ^^^^ Destructing OwnerRRef (created_on=0, local_id=0) | |
(88626) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=1) GloballyUniqueId(created_on=0, local_id=2) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=3) GloballyUniqueId(created_on=0, local_id=4) | |
(88627) ^^^^ Destructing OwnerRRef (created_on=0, local_id=3) | |
(88626) ^^^^ Destructing OwnerRRef (created_on=0, local_id=1) | |
terminate called without an active exception | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=56) GloballyUniqueId(created_on=1, local_id=13) | |
(88627) ../torch/csrc/distributed/rpc/rref_context.cpp:814 GloballyUniqueId(created_on=0, local_id=62) GloballyUniqueId(created_on=1, local_id=16) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker2 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker1 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker8 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker7 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker5 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker4 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker9 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker6 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
[W tensorpipe_agent.cpp:682] RPC agent for worker3 encountered error when reading incoming request from worker0: eof (this error originated at tensorpipe/transport/shm/connection_impl.cc:259) | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88626 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88627 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88628 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88629 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88630 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88631 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88632 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88633 closing signal SIGTERM | |
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 88634 closing signal SIGTERM | |
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: -6) local_rank: 0 (pid: 88625) of binary: /fsx/users/jamesreed/conda/bin/python | |
Traceback (most recent call last): | |
File "/fsx/users/jamesreed/conda/bin/torchrun", line 33, in <module> | |
sys.exit(load_entry_point('torch', 'console_scripts', 'torchrun')()) | |
File "/fsx/users/jamesreed/pytorch/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 345, in wrapper | |
return f(*args, **kwargs) | |
File "/fsx/users/jamesreed/pytorch/torch/distributed/run.py", line 724, in main | |
run(args) | |
File "/fsx/users/jamesreed/pytorch/torch/distributed/run.py", line 715, in run | |
elastic_launch( | |
File "/fsx/users/jamesreed/pytorch/torch/distributed/launcher/api.py", line 131, in __call__ | |
return launch_agent(self._config, self._entrypoint, list(args)) | |
File "/fsx/users/jamesreed/pytorch/torch/distributed/launcher/api.py", line 245, in launch_agent | |
raise ChildFailedError( | |
torch.distributed.elastic.multiprocessing.errors.ChildFailedError: | |
============================================================ | |
/fsx/users/jamesreed/pipeline_for_real/test/local_test_forward_backward.py FAILED | |
------------------------------------------------------------ | |
Failures: | |
<NO_OTHER_FAILURES> | |
------------------------------------------------------------ | |
Root Cause (first observed failure): | |
[0]: | |
time : 2022-02-23_21:50:37 | |
host : ip-10-200-31-5.ec2.internal | |
rank : 0 (local_rank: 0) | |
exitcode : -6 (pid: 88625) | |
error_file: <N/A> | |
traceback : Signal 6 (SIGABRT) received by PID 88625 | |
============================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment