Last active
September 9, 2022 03:07
-
-
Save davidberard98/cd3a3fe1c41f30fe04194cbb60827003 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and will be removed in 0.15, please use 'weights' instead. | |
warnings.warn( | |
/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights. | |
warnings.warn(msg) | |
WARNING:root:Using TorchDynamo with a context manager will be deprecated soon.Please read https://github.com/pytorch/torchdynamo#usage-example to use TorchDynamo using an annotation. | |
ERROR:root:Error while processing frame | |
Traceback (most recent call last): | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/eval_frame.py", line 225, in catch_errors | |
log.debug(f"skipping {frame.f_code.co_name} {frame.f_code.co_filename}") | |
File "/fsx/users/dberard/scratch-local/bench-fast/benchmark/torchbenchmark/util/model.py", line 188, in invoke | |
self.train() | |
File "/fsx/users/dberard/scratch-local/bench-fast/benchmark/torchbenchmark/util/framework/vision/model_factory.py", line 65, in train | |
pred = self.model(data) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl | |
return forward_call(*input, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1040, in forward | |
output = self._run_ddp_forward(*inputs, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1000, in _run_ddp_forward | |
return module_to_run(*inputs[0], **kwargs[0]) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl | |
return forward_call(*input, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchvision/models/resnet.py", line 284, in forward | |
def forward(self, x: Tensor) -> Tensor: | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/eval_frame.py", line 166, in _fn | |
return fn(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 889, in forward | |
return compiled_f( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 875, in new_func | |
compiled_fn = create_aot_dispatcher_function( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 450, in create_aot_dispatcher_function | |
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 335, in aot_dispatch_autograd | |
compiled_fw_func = aot_config.fw_compiler(fw_module, flat_args) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 237, in f | |
out_f = compiler(fx_g, inps) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/compile_fx.py", line 177, in fw_compiler | |
return compile_fx_inner( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/debug_utils.py", line 329, in debug_wrapper | |
compiled_fn = compiler(gm, example_inputs, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/debug.py", line 182, in inner | |
return fn(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/compile_fx.py", line 60, in compile_fx_inner | |
compiled_fn = wrap(graph.compile_to_fn()) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/graph.py", line 333, in compile_to_fn | |
return self.compile_to_module().call | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/graph.py", line 323, in compile_to_module | |
mod = PyCodeCache.load(code) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 162, in load | |
exec(code, mod.__dict__, mod.__dict__) | |
File "/tmp/torchinductor_dberard/s7/cs7yddkfqu2hqk2hbxty2b65q4zavo3yxmatm3yx2ie73ghiqkh7.py", line 18, in <module> | |
kernel0 = TritonCodeCache.load(''' | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 243, in load | |
return PyCodeCache.load(source_code) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 156, in load | |
key, path = write(source_code, "py") | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 46, in write | |
os.rename(tmp_path, path) | |
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/torchinductor_dberard/hb/chbtiqye637eosh5eyoh7t3ve4mviybimx2m2uudsrm6bt22x3nd.py.1571306751' -> '/tmp/torchinductor_dberard/hb/chbtiqye637eosh5eyoh7t3ve4mviybimx2m2uudsrm6bt22x3nd.py' | |
submitit ERROR (2022-09-09 02:24:05,677) - Submitted job triggered an exception | |
ERROR:submitit:Submitted job triggered an exception | |
Traceback (most recent call last): | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/runpy.py", line 194, in _run_module_as_main | |
return _run_code(code, main_globals, None, | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/runpy.py", line 87, in _run_code | |
exec(code, run_globals) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/submitit/core/_submit.py", line 11, in <module> | |
submitit_main() | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/submitit/core/submission.py", line 72, in submitit_main | |
process_job(args.folder) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/submitit/core/submission.py", line 65, in process_job | |
raise error | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/submitit/core/submission.py", line 54, in process_job | |
result = delayed.result() | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/submitit/core/utils.py", line 133, in result | |
self._result = self.function(*self.args, **self.kwargs) | |
File "ddp_experiments.py", line 149, in __call__ | |
return trainer_class(self.args, model_class, model_args=self.model_args).measure() | |
File "/fsx/users/dberard/scratch-local/bench-fast/benchmark/torchbenchmark/util/distributed/core_model/trainer.py", line 79, in measure | |
self.benchmark.invoke() | |
File "/fsx/users/dberard/scratch-local/bench-fast/benchmark/torchbenchmark/util/model.py", line 188, in invoke | |
self.train() | |
File "/fsx/users/dberard/scratch-local/bench-fast/benchmark/torchbenchmark/util/framework/vision/model_factory.py", line 65, in train | |
pred = self.model(data) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl | |
return forward_call(*input, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1040, in forward | |
output = self._run_ddp_forward(*inputs, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 1000, in _run_ddp_forward | |
return module_to_run(*inputs[0], **kwargs[0]) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl | |
return forward_call(*input, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchvision/models/resnet.py", line 284, in forward | |
def forward(self, x: Tensor) -> Tensor: | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/eval_frame.py", line 166, in _fn | |
return fn(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 889, in forward | |
return compiled_f( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 875, in new_func | |
compiled_fn = create_aot_dispatcher_function( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 450, in create_aot_dispatcher_function | |
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 335, in aot_dispatch_autograd | |
compiled_fw_func = aot_config.fw_compiler(fw_module, flat_args) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/functorch/_src/aot_autograd.py", line 237, in f | |
out_f = compiler(fx_g, inps) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/compile_fx.py", line 177, in fw_compiler | |
return compile_fx_inner( | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/debug_utils.py", line 329, in debug_wrapper | |
compiled_fn = compiler(gm, example_inputs, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/debug.py", line 182, in inner | |
return fn(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/compile_fx.py", line 60, in compile_fx_inner | |
compiled_fn = wrap(graph.compile_to_fn()) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/graph.py", line 333, in compile_to_fn | |
return self.compile_to_module().call | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchdynamo/utils.py", line 75, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/graph.py", line 323, in compile_to_module | |
mod = PyCodeCache.load(code) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 162, in load | |
exec(code, mod.__dict__, mod.__dict__) | |
File "/tmp/torchinductor_dberard/s7/cs7yddkfqu2hqk2hbxty2b65q4zavo3yxmatm3yx2ie73ghiqkh7.py", line 18, in <module> | |
kernel0 = TritonCodeCache.load(''' | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 243, in load | |
return PyCodeCache.load(source_code) | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 156, in load | |
key, path = write(source_code, "py") | |
File "/data/home/dberard/miniconda/envs/bench-fast/lib/python3.8/site-packages/torchinductor/codecache.py", line 46, in write | |
os.rename(tmp_path, path) | |
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/torchinductor_dberard/hb/chbtiqye637eosh5eyoh7t3ve4mviybimx2m2uudsrm6bt22x3nd.py.1571306751' -> '/tmp/torchinductor_dberard/hb/chbtiqye637eosh5eyoh7t3ve4mviybimx2m2uudsrm6bt22x3nd.py' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this basically shows which jobs failed. | |
# the ones with size(err file) > 10,000 bytes failed. | |
# failure example from 62606_13_log.err is shown in ddp_error_cache.txt | |
$ ls 62606* -l | |
-rw-rw-r-- 1 dberard dberard 1126 Sep 9 02:58 62606_0_log.err | |
-rw-rw-r-- 1 dberard dberard 28388 Sep 9 02:27 62606_0_log.out | |
-rw-rw-r-- 1 dberard dberard 12549 Sep 9 02:24 62606_10_log.err | |
-rw-rw-r-- 1 dberard dberard 25944 Sep 9 02:24 62606_10_log.out | |
-rw-rw-r-- 1 dberard dberard 5631 Sep 9 02:24 62606_10_result.pkl | |
-rw-rw-r-- 1 dberard dberard 1053 Sep 9 02:57 62606_11_log.err | |
-rw-rw-r-- 1 dberard dberard 24748 Sep 9 02:27 62606_11_log.out | |
-rw-rw-r-- 1 dberard dberard 1053 Sep 9 02:57 62606_12_log.err | |
-rw-rw-r-- 1 dberard dberard 27593 Sep 9 02:27 62606_12_log.out | |
-rw-rw-r-- 1 dberard dberard 12549 Sep 9 02:24 62606_13_log.err | |
-rw-rw-r-- 1 dberard dberard 23722 Sep 9 02:24 62606_13_log.out | |
-rw-rw-r-- 1 dberard dberard 5631 Sep 9 02:24 62606_13_result.pkl | |
-rw-rw-r-- 1 dberard dberard 1053 Sep 9 02:57 62606_14_log.err | |
-rw-rw-r-- 1 dberard dberard 26313 Sep 9 02:27 62606_14_log.out | |
-rw-rw-r-- 1 dberard dberard 1053 Sep 9 02:57 62606_15_log.err | |
-rw-rw-r-- 1 dberard dberard 24740 Sep 9 02:27 62606_15_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:57 62606_1_log.err | |
-rw-rw-r-- 1 dberard dberard 23814 Sep 9 02:27 62606_1_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:58 62606_2_log.err | |
-rw-rw-r-- 1 dberard dberard 26053 Sep 9 02:27 62606_2_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:58 62606_3_log.err | |
-rw-rw-r-- 1 dberard dberard 25205 Sep 9 02:58 62606_3_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:58 62606_4_log.err | |
-rw-rw-r-- 1 dberard dberard 27334 Sep 9 02:27 62606_4_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:58 62606_5_log.err | |
-rw-rw-r-- 1 dberard dberard 23821 Sep 9 02:27 62606_5_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:58 62606_6_log.err | |
-rw-rw-r-- 1 dberard dberard 26060 Sep 9 02:27 62606_6_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:57 62606_7_log.err | |
-rw-rw-r-- 1 dberard dberard 24699 Sep 9 02:27 62606_7_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:57 62606_8_log.err | |
-rw-rw-r-- 1 dberard dberard 27535 Sep 9 02:27 62606_8_log.out | |
-rw-rw-r-- 1 dberard dberard 1052 Sep 9 02:57 62606_9_log.err | |
-rw-rw-r-- 1 dberard dberard 23826 Sep 9 02:27 62606_9_log.out | |
-rw-rw-r-- 1 dberard dberard 821 Sep 9 02:20 62606_submission.sh | |
-rw-rw-r-- 1 dberard dberard 3477 Sep 9 02:20 62606_submitted.pkl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment