Last active
March 8, 2023 23:40
-
-
Save davidberard98/6a3875a0f71641349beea7bde64560ce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cuda train hf_Longformer [2023-03-08 23:40:23,242] torch._dynamo.debug_utils: [WARNING] Compiled Fx GraphModule failed. Creating script to minify the error. | |
[2023-03-08 23:40:23,244] torch._dynamo.debug_utils: [WARNING] Writing minified repro to /scratch/dberard/bisectdynamo/pytorch/torch_compile_debug/run_2023_03_08_23_40_23_244562-pid_3089959/minifier/minifier_launcher.py | |
ERROR:common:inductor raised Exception: Please convert all Tensors to FakeTensors first or instantiate FakeTensorMode with 'allow_non_fake_inputs'. Found in aten.copy_.default(*(tensor([[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0'), FakeTensor(FakeTensor(..., device='meta', size=(4, 1, 1, 1024)), cuda:0)), **{}) | |
Set torch._dynamo.config.verbose=True for more information | |
Minifier script written to /scratch/dberard/bisectdynamo/pytorch/torch_compile_debug/run_2023_03_08_23_40_23_244562-pid_3089959/minifier/minifier_launcher.py. Run this script to find the smallest traced graph which reproduces this error. | |
You can suppress this exception and fall back to eager by setting: | |
torch._dynamo.config.suppress_errors = True | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 708, in call_user_compiler | |
compiled_fn = compiler_fn(gm, self.fake_example_inputs()) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/debug_utils.py", line 1032, in debug_wrapper | |
run_fwd_maybe_bwd(compiled_gm, example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/debug_utils.py", line 633, in run_fwd_maybe_bwd | |
out = gm(args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1222, in g | |
return f(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/eval_frame.py", line 231, in _fn | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 2853, in forward | |
return compiled_fn(full_args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1222, in g | |
return f(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 2407, in debug_compiled_function | |
return compiled_function(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1909, in runtime_wrapper | |
all_outs = call_func_with_args( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1247, in call_func_with_args | |
out = normalize_as_list(f(args)) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1222, in g | |
return f(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/autograd/function.py", line 506, in apply | |
return super().apply(*args, **kwargs) # type: ignore[misc] | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 2162, in forward | |
fw_outs = call_func_with_args( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1247, in call_func_with_args | |
out = normalize_as_list(f(args)) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 252, in run | |
return model(new_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 271, in run | |
return compiled_fn(new_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 351, in run | |
dst.copy_(src) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_stats.py", line 20, in wrapper | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_subclasses/fake_tensor.py", line 944, in __torch_dispatch__ | |
return func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_ops.py", line 284, in __call__ | |
return self._op(*args, **kwargs or {}) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_stats.py", line 20, in wrapper | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_subclasses/fake_tensor.py", line 1057, in __torch_dispatch__ | |
return self.dispatch(func, types, args, kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_subclasses/fake_tensor.py", line 1136, in dispatch | |
args, kwargs = self.validate_and_convert_non_fake_tensors( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_subclasses/fake_tensor.py", line 1290, in validate_and_convert_non_fake_tensors | |
return tree_map_only( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_pytree.py", line 266, in tree_map_only | |
return tree_map(map_only(ty)(fn), pytree) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_pytree.py", line 196, in tree_map | |
return tree_unflatten([fn(i) for i in flat_args], spec) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_pytree.py", line 196, in <listcomp> | |
return tree_unflatten([fn(i) for i in flat_args], spec) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_pytree.py", line 247, in inner | |
return f(x) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_subclasses/fake_tensor.py", line 1282, in validate | |
raise Exception( | |
Exception: Please convert all Tensors to FakeTensors first or instantiate FakeTensorMode with 'allow_non_fake_inputs'. Found in aten.copy_.default(*(tensor([[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0'), FakeTensor(FakeTensor(..., device='meta', size=(4, 1, 1, 1024)), cuda:0)), **{}) | |
The above exception was the direct cause of the following exception: | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/benchmarks/dynamo/common.py", line 1293, in check_accuracy | |
new_result = optimized_model_iter_fn(model_copy, example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/eval_frame.py", line 231, in _fn | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/benchmarks/dynamo/common.py", line 1160, in run_n_iterations | |
self.model_iter_fn(mod, inputs, collect_outputs=False) | |
File "benchmarks/dynamo/torchbench.py", line 365, in forward_and_backward_pass | |
cloned_inputs = clone_inputs(inputs) | |
File "benchmarks/dynamo/torchbench.py", line 366, in <graph break in forward_and_backward_pass> | |
self.optimizer_zero_grad(mod) | |
File "benchmarks/dynamo/torchbench.py", line 368, in <graph break in forward_and_backward_pass> | |
pred = mod(*cloned_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1848, in forward | |
outputs = self.longformer( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1742, in forward | |
extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)[ | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/eval_frame.py", line 368, in catch_errors | |
return callback(frame, cache_size, hooks) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 404, in _convert_frame | |
result = inner_convert(frame, cache_size, hooks) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 104, in _fn | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 262, in _convert_frame_assert | |
return _compile( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 324, in _compile | |
out_code = transform_code_object(code, transform) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/bytecode_transformation.py", line 530, in transform_code_object | |
transformations(instructions, code_options) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 311, in transform | |
tracer.run() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 1862, in run | |
super().run() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 619, in run | |
and self.step() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 583, in step | |
getattr(self, inst.opname)(inst) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 379, in wrapper | |
self.output.compile_subgraph(self, reason=reason) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 579, in compile_subgraph | |
self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 626, in compile_and_call_fx_graph | |
compiled_fn = self.call_user_compiler(gm) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 713, in call_user_compiler | |
raise BackendCompilerFailed(self.compiler_fn, e) from e | |
torch._dynamo.exc.BackendCompilerFailed: inductor raised Exception: Please convert all Tensors to FakeTensors first or instantiate FakeTensorMode with 'allow_non_fake_inputs'. Found in aten.copy_.default(*(tensor([[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]], | |
[[[0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0'), FakeTensor(FakeTensor(..., device='meta', size=(4, 1, 1, 1024)), cuda:0)), **{}) | |
Set torch._dynamo.config.verbose=True for more information | |
Minifier script written to /scratch/dberard/bisectdynamo/pytorch/torch_compile_debug/run_2023_03_08_23_40_23_244562-pid_3089959/minifier/minifier_launcher.py. Run this script to find the smallest traced graph which reproduces this error. | |
You can suppress this exception and fall back to eager by setting: | |
torch._dynamo.config.suppress_errors = True | |
TorchDynamo optimized model failed to run because of following error | |
FAIL |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cuda train hf_Longformer [2023-03-08 23:20:43,889] torch._inductor.utils: [WARNING] skipping cudagraphs due to multiple devices | |
[2023-03-08 23:20:49,843] torch._inductor.utils: [WARNING] skipping cudagraphs due to multiple devices | |
[2023-03-08 23:20:55,239] torch._inductor.utils: [WARNING] skipping cudagraphs due to multiple devices | |
[2023-03-08 23:20:55,280] torch._inductor.graph: [ERROR] Error from lowering | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/graph.py", line 354, in call_function | |
out = lowerings[target](*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/lowering.py", line 229, in wrapped | |
validate_ir(out) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/ir.py", line 103, in validate_ir | |
_check_tensorbox(node_or_nodes) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/ir.py", line 88, in _check_tensorbox | |
assert isinstance( | |
AssertionError: Found <class 'torch._inductor.ir.DynamicScalar'>, which is not a supported top level IR node. See [Note: Inductor IR] | |
ERROR:common:inductor raised LoweringException: AssertionError: Found <class 'torch._inductor.ir.DynamicScalar'>, which is not a supported top level IR node. See [Note: Inductor IR] | |
target: aten._local_scalar_dense.default | |
args[0]: TensorBox(StorageBox( | |
Pointwise( | |
'cpu', | |
torch.int64, | |
tmp0 = constant(1024, torch.int64) | |
tmp1 = constant(512, torch.int64) | |
tmp2 = truncdiv(tmp0, tmp1) | |
return tmp2 | |
, | |
ranges=(), | |
origins={div} | |
) | |
)) | |
While executing %_local_scalar_dense : [#users=0] = call_function[target=torch.ops.aten._local_scalar_dense.default](args = (%div,), kwargs = {}) | |
Original traceback: | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 839, in <graph break in _sliding_chunks_query_key_matmul> | |
query = self._chunk(query, window_overlap, self.config.__dict__.get("onnx_export", False)) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 769, in _chunk | |
hidden_states = hidden_states.view( | |
Set torch._dynamo.config.verbose=True for more information | |
You can suppress this exception and fall back to eager by setting: | |
torch._dynamo.config.suppress_errors = True | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/graph.py", line 354, in call_function | |
out = lowerings[target](*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/lowering.py", line 229, in wrapped | |
validate_ir(out) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/ir.py", line 103, in validate_ir | |
_check_tensorbox(node_or_nodes) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/ir.py", line 88, in _check_tensorbox | |
assert isinstance( | |
AssertionError: Found <class 'torch._inductor.ir.DynamicScalar'>, which is not a supported top level IR node. See [Note: Inductor IR] | |
The above exception was the direct cause of the following exception: | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 708, in call_user_compiler | |
compiled_fn = compiler_fn(gm, self.fake_example_inputs()) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/debug_utils.py", line 1055, in debug_wrapper | |
compiled_gm = compiler_fn(gm, example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/backends/inductor.py", line 9, in inductor | |
return compile_fx(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 488, in compile_fx | |
return aot_autograd( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/backends/common.py", line 48, in compiler_fn | |
cg = aot_module_simplified(gm, example_inputs, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 2839, in aot_module_simplified | |
compiled_fn = create_aot_dispatcher_function( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 2522, in create_aot_dispatcher_function | |
compiled_fn = compiler_fn(flat_fn, fake_flat_args, aot_config) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1724, in aot_wrapper_dedupe | |
return compiler_fn(flat_fn, leaf_flat_args, aot_config) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_functorch/aot_autograd.py", line 1335, in aot_dispatch_base | |
compiled_fw = aot_config.fw_compiler(fw_module, flat_args_with_views_handled) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 462, in fw_compiler | |
return inner_compile( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/debug_utils.py", line 595, in debug_wrapper | |
compiled_fn = compiler_fn(gm, example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/debug.py", line 239, in inner | |
return fn(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/contextlib.py", line 75, in inner | |
return func(*args, **kwds) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/compile_fx.py", line 179, in compile_fx_inner | |
graph.run(*example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/graph.py", line 211, in run | |
return super().run(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/interpreter.py", line 136, in run | |
self.env[node] = self.run_node(node) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/graph.py", line 436, in run_node | |
result = super().run_node(n) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/interpreter.py", line 177, in run_node | |
return getattr(self, n.op)(n.target, args, kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_inductor/graph.py", line 358, in call_function | |
raise LoweringException(e, target, args, kwargs) from e | |
torch._inductor.exc.LoweringException: AssertionError: Found <class 'torch._inductor.ir.DynamicScalar'>, which is not a supported top level IR node. See [Note: Inductor IR] | |
target: aten._local_scalar_dense.default | |
args[0]: TensorBox(StorageBox( | |
Pointwise( | |
'cpu', | |
torch.int64, | |
tmp0 = constant(1024, torch.int64) | |
tmp1 = constant(512, torch.int64) | |
tmp2 = truncdiv(tmp0, tmp1) | |
return tmp2 | |
, | |
ranges=(), | |
origins={div} | |
) | |
)) | |
While executing %_local_scalar_dense : [#users=0] = call_function[target=torch.ops.aten._local_scalar_dense.default](args = (%div,), kwargs = {}) | |
Original traceback: | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 839, in <graph break in _sliding_chunks_query_key_matmul> | |
query = self._chunk(query, window_overlap, self.config.__dict__.get("onnx_export", False)) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 769, in _chunk | |
hidden_states = hidden_states.view( | |
The above exception was the direct cause of the following exception: | |
Traceback (most recent call last): | |
File "/scratch/dberard/bisectdynamo/pytorch/benchmarks/dynamo/common.py", line 1293, in check_accuracy | |
new_result = optimized_model_iter_fn(model_copy, example_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/eval_frame.py", line 231, in _fn | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/benchmarks/dynamo/common.py", line 1160, in run_n_iterations | |
self.model_iter_fn(mod, inputs, collect_outputs=False) | |
File "benchmarks/dynamo/torchbench.py", line 365, in forward_and_backward_pass | |
cloned_inputs = clone_inputs(inputs) | |
File "benchmarks/dynamo/torchbench.py", line 366, in <graph break in forward_and_backward_pass> | |
self.optimizer_zero_grad(mod) | |
File "benchmarks/dynamo/torchbench.py", line 368, in <graph break in forward_and_backward_pass> | |
pred = mod(*cloned_inputs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1848, in forward | |
outputs = self.longformer( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1742, in forward | |
extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)[ | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1750, in <graph break in forward> | |
encoder_outputs = self.encoder( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1294, in forward | |
is_global_attn = is_index_global_attn.flatten().any().item() | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1326, in <graph break in forward> | |
layer_outputs = layer_module( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1249, in forward | |
self_attn_outputs = self.attention( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 1185, in forward | |
self_outputs = self.self( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 574, in forward | |
attn_scores = self._sliding_chunks_query_key_matmul( | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 586, in <graph break in forward> | |
diagonal_mask = self._sliding_chunks_query_key_matmul( | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 839, in _sliding_chunks_query_key_matmul | |
query = self._chunk(query, window_overlap, self.config.__dict__.get("onnx_export", False)) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/eval_frame.py", line 368, in catch_errors | |
return callback(frame, cache_size, hooks) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 404, in _convert_frame | |
result = inner_convert(frame, cache_size, hooks) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 104, in _fn | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 262, in _convert_frame_assert | |
return _compile( | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 324, in _compile | |
out_code = transform_code_object(code, transform) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/bytecode_transformation.py", line 530, in transform_code_object | |
transformations(instructions, code_options) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/convert_frame.py", line 311, in transform | |
tracer.run() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 1862, in run | |
super().run() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 619, in run | |
and self.step() | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 583, in step | |
getattr(self, inst.opname)(inst) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/symbolic_convert.py", line 379, in wrapper | |
self.output.compile_subgraph(self, reason=reason) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 579, in compile_subgraph | |
self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 626, in compile_and_call_fx_graph | |
compiled_fn = self.call_user_compiler(gm) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/utils.py", line 164, in time_wrapper | |
r = func(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_dynamo/output_graph.py", line 713, in call_user_compiler | |
raise BackendCompilerFailed(self.compiler_fn, e) from e | |
torch._dynamo.exc.BackendCompilerFailed: inductor raised LoweringException: AssertionError: Found <class 'torch._inductor.ir.DynamicScalar'>, which is not a supported top level IR node. See [Note: Inductor IR] | |
target: aten._local_scalar_dense.default | |
args[0]: TensorBox(StorageBox( | |
Pointwise( | |
'cpu', | |
torch.int64, | |
tmp0 = constant(1024, torch.int64) | |
tmp1 = constant(512, torch.int64) | |
tmp2 = truncdiv(tmp0, tmp1) | |
return tmp2 | |
, | |
ranges=(), | |
origins={div} | |
) | |
)) | |
While executing %_local_scalar_dense : [#users=0] = call_function[target=torch.ops.aten._local_scalar_dense.default](args = (%div,), kwargs = {}) | |
Original traceback: | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 839, in <graph break in _sliding_chunks_query_key_matmul> | |
query = self._chunk(query, window_overlap, self.config.__dict__.get("onnx_export", False)) | |
File "/data/home/dberard/miniconda/envs/bisectdynamo/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py", line 769, in _chunk | |
hidden_states = hidden_states.view( | |
Set torch._dynamo.config.verbose=True for more information | |
You can suppress this exception and fall back to eager by setting: | |
torch._dynamo.config.suppress_errors = True | |
TorchDynamo optimized model failed to run because of following error | |
FAIL |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Traceback (most recent call last): | |
File "torch_compile_debug/run_2023_03_08_23_32_30_111734-pid_3083040/minifier/minifier_launcher.py", line 55, in <module> | |
mod = make_fx(Repro(), tracing_mode='real')(*args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 714, in wrapped | |
t = dispatch_trace(wrap_key(func, args, fx_tracer), tracer=fx_tracer, concrete_args=tuple(phs)) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 443, in dispatch_trace | |
graph = tracer.trace(root, concrete_args) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/_symbolic_trace.py", line 778, in trace | |
(self.create_arg(fn(*args)),), | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 459, in wrapped | |
out = f(*tensors) | |
File "<string>", line 1, in <lambda> | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/_symbolic_trace.py", line 756, in module_call_wrapper | |
return self.call_module(mod, forward, args, kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 409, in call_module | |
return forward(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/_symbolic_trace.py", line 749, in forward | |
return _orig_module_call(mod, *args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/nn/modules/module.py", line 1533, in _call_impl | |
return forward_call(*args, **kwargs) | |
File "torch_compile_debug/run_2023_03_08_23_32_30_111734-pid_3083040/minifier/minifier_launcher.py", line 48, in forward | |
_local_scalar_dense = torch.ops.aten._local_scalar_dense.default(div); div = None | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/_ops.py", line 284, in __call__ | |
return self._op(*args, **kwargs or {}) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/utils/_stats.py", line 20, in wrapper | |
return fn(*args, **kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 487, in __torch_dispatch__ | |
return self.inner_torch_dispatch(func, types, args, kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 512, in inner_torch_dispatch | |
out = proxy_call(self, func, args, kwargs) | |
File "/scratch/dberard/bisectdynamo/pytorch/torch/fx/experimental/proxy_tensor.py", line 282, in proxy_call | |
raise RuntimeError( | |
RuntimeError: It appears that you're trying to get value out of a tracing tensor with aten._local_scalar_dense.default - erroring out! It's likely that this is caused by data-dependent control flow or similar. It may be possible to trace this with dynamic shapes; try setting tracing_mode='symbolic' in your make_fx call. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment