Created
May 15, 2025 03:50
-
-
Save mlazos/f6a8bf58a82d6b948a2a976bf431562b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> TORCH_LOGS="+inductor" python test/inductor/test_cutlass_backend.py -k test_evt_fusions_basic_mul_shape0 | |
[INFO]:compile_threads set to 32 | |
[INFO]:Creating 'subprocess' pool with 32 workers | |
[INFO]:compile_threads set to 32 | |
> /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py(129)run_evt_test() | |
-> M, N = shape | |
(Pdb) c | |
[DEBUG]:TRACED GRAPH | |
===== BEFORE PRE GRAD ===== | |
/data/users/mlazos/pytorch/torch/fx/_lazy_graph_module.py class GraphModule(torch.nn.Module): | |
def forward(self, L_a_: "f16[512, 512][512, 1]cuda:0", L_b_: "f16[512, 512][512, 1]cuda:0", L_extra_args_0_: "f16[512, 512][512, 1]cuda:0"): | |
l_a_ = L_a_ | |
l_b_ = L_b_ | |
l_extra_args_0_ = L_extra_args_0_ | |
# File: /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py:1446 in forward, code: res = (a @ b).relu() # add extra activation to not hit addmm path | |
matmul: "f16[512, 512][512, 1]cuda:0" = l_a_ @ l_b_; l_a_ = l_b_ = None | |
res: "f16[512, 512][512, 1]cuda:0" = matmul.relu(); matmul = None | |
# File: /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py:1447 in forward, code: return op(res, *extra_args) | |
mul: "f16[512, 512][512, 1]cuda:0" = torch.mul(res, l_extra_args_0_); res = l_extra_args_0_ = None | |
return (mul,) | |
[DEBUG]:FX cache status: use_cache=True, local=True, remote=False, aot_mode=False, force_disable_caches=False | |
[DEBUG]:FX graph cache hash details for key fhek3hdikpc6h5t4gjddbvlkwxvg3wutqtw4ztaixgfqrdlj2om6: | |
[hcxdcmudqakhu6jlr7udswn4bpnfbom3g64jo3ywtfh6actncst] gm: <lambda>() | |
def forward(self, arg0_1, arg1_1, arg2_1): | |
mm = torch.ops.aten.mm.default(arg0_1, arg1_1); arg0_1 = arg1_1 = None | |
relu = torch.ops.aten.relu.default(mm); mm = None | |
mul = torch.ops.aten.mul.Tensor(relu, arg2_1); relu = arg2_1 = None | |
return (mul,) | |
# To see more debug info, please use `graph_module.print_readable()` | |
[ehdekuicxsxcr7g7dpwr4zx2aq6hw3xqm64w4bhmzd4gqwcbn22] example_inputs[0]: TensorMetadata(dtype=torch.float16, shape=torch.Size([512, 512]), stride=(512, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None) | |
[ehdekuicxsxcr7g7dpwr4zx2aq6hw3xqm64w4bhmzd4gqwcbn22] example_inputs[1]: TensorMetadata(dtype=torch.float16, shape=torch.Size([512, 512]), stride=(512, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None) | |
[ehdekuicxsxcr7g7dpwr4zx2aq6hw3xqm64w4bhmzd4gqwcbn22] example_inputs[2]: TensorMetadata(dtype=torch.float16, shape=torch.Size([512, 512]), stride=(512, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None) | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] cache_key_tag: | |
[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None) | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False | |
[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False) | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None | |
[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: [] | |
[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0 | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1 | |
[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2 | |
[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True) | |
[7as26aeta7rzhgm2mxh4el36kupf55fr27327kzc2fsdiy3nexy] cuda_matmul_settings: (True, True, True) | |
[6c7fgf6wxwt2oi2lpcltxu5n7oy4xmnwtjpctzflwg2tzb4rmum] torch_version: <bytes> | |
[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'} | |
[q45kt4ppwjt56yoczjqm3llshxnzu5ycpand4urjhaxoks64nb3] system_info[version]: {'triton': '3.2.039ace39686dc8ae6b537d14ae140575242b5833746b519e6e329b355f5d9e364-83cd0eef733233d69c83b5cd7dc1e0aeae88e1b36011de7f2d8ee0629def3ec1-39ace39686dc8ae6b537d14ae140575242b5833746b519e6e329b355f5d9e364-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-2598a211b07d5b0b37b6353a297af274ab6539023b4ae93e99360f95a7dad109-417ffa3f51a8f5a8dd947662e07bf79c2b5d1f20c0ea8f3f94cdcd327947d669-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-ea8d8a4744679bb406b914485b1743669eb685f24949cd210f8fb72c103d2e46-747b5e566049ac66b1bbb28a97a3dbce27460dd1183d43b663b3ea5fedfdf436-3197988cbd61f02c381bc27502ff651fc401db3d92c1c7f93a78da08005065a4-6e4a7df0c1f6cb799f488ee1d6efd3df754fc18aac6e7209923bb022c36c7c4e-6bd038079b94067d628eea9264530fec03d4d7027ff834e89b9d6fbc31ae8bd9-5d15c5bebef8d7aa51b21fd187e5faa95eba4a213254355bc69e0648013599f7-26a8aeaf21e759191f173b578316e243ad970387442a1b238481c6a1b2eecdc4-170b6c22d072485ef44734b7a8c2ab95c771ce78c5a26e890f8d103acfd17cd5-72bc1771d8b160fbafcd5390d1ee6cb72440c10ad4834451465a9e93b42f5d1c-7b506c955ee5646862dae548968e6523d75c37ad4750c214d9ab0f6918ecc88a-89e4844782de5fdff4868ac2846514570a6e280a6b92e91af2e180904043c833-115ada51f797cd098ddc5e4b95e8369a885571b15639694b6801f59e46eab55e-0e48b5e1e95136642ccfe62dc3d0a739a2c20a7b5ee13e9c23c6cecd68cdeb70-2e4af7e3ca937a7dd4c7207eb6ee3f8f7ebc2f30912b52a7204eeac040bd2a0f-b3aeb8ae6cb1e94085254a3c33612134752c72444ec219965dd7fa52e010a762-c89868706a4edff5f861293d19345031a95b843b7b7a1f523b30ead360e3837e-34ca3a350389b7cd59c72f6cf94f68e319a0e4394a858d381fb59344a7cc052e', 'cuda': '12.8'} | |
[erzhzpy5wesaphmuilsn7nd74fslmrcwn36f375w2b2umoqepbt] system_info[hash]: f9aacda3e2c9b721cd2dd3eb6634e5a739f324a18bb912ce3b13e4c27cdc4996 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_padding]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[can_inplace_pad_graph_input]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[non_blocking_remote_cache_write]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[bundled_autotune_remote_cache]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_skip_cache_dynamic_shape_guards]: False | |
[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[unsafe_marked_cacheable_functions]: [] | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None | |
[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp_cache_precompile_headers]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[online_softmax]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[scalar_asserts]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[alignment_asserts]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_fast_math]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bfloat16_atomic_adds_enabled]: True | |
[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[prologue_fusion]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True | |
[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {} | |
[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {} | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True | |
[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True} | |
[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False | |
[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms'] | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[reorder_prefetch_limit]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True | |
[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default | |
[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300 | |
[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_experimental_benchmarker]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[max_autotune]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False | |
[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[autotune_num_choices_displayed]: 10 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[graph_partition]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False | |
[xr6sxc5frovi6fix4qdrk5bgse4kjvletetln5bngecsyxeleh5] inductor_config[max_autotune_gemm_backends]: CUTLASS | |
[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON | |
[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_fallback_to_aten]: False | |
[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False | |
[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0 | |
[zslw6pp37dzmhi5lhweftlhhdttfjade3t5j3y3vfk3ouze7nhw] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 0.0 | |
[zslw6pp37dzmhi5lhweftlhhdttfjade3t5j3y3vfk3ouze7nhw] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 0.0 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1 | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: | |
[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm | |
[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT | |
[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False | |
[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4 | |
[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30 | |
[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_unaligned_fallback_output]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False | |
[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_epilogue_fusion]: False | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1 | |
[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64 | |
[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_pointwise_cat]: False | |
[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1 | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[add_pre_grad_passes]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[remove_pre_grad_passes]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_static_cuda_launcher]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[strict_static_cuda_launcher]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None | |
[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True | |
[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128 | |
[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[enable_linear_binary_folding]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[annotate_training]: False | |
[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None | |
[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096 | |
[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++') | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None | |
[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten | |
[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False | |
[ijs44lspkinjvhcs7uff7n3noc53jvsp4yfljjh22mafhb7khxe] inductor_config[cpp.enable_floating_point_contract_flag]: off | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_grouped_gemm_template]: False | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1 | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.use_decompose_tanh]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True | |
[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128 | |
[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False | |
[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_with_sample_inputs]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.tile_reductions]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.unique_user_kernel_names]: False | |
[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False | |
[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0 | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True | |
[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False | |
[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.enable_persistent_tma_matmul]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_l1_cache]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.disallow_failing_autotune_kernels_TESTING_ONLY]: False | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False | |
[6fxyf5ymh244xdypwkhtsbszab4nnfsgmul2kmyqmw422i5h54e] inductor_config[aot_inductor.compile_wrapper_opt_level]: O1 | |
[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0 | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: | |
[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False | |
[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {} | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False | |
[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[aot_inductor.repro_level]: 2 | |
[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {} | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.allow_stack_allocation]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_minimal_arrayref_interface]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.package_constants_in_so]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.precompile_headers]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None | |
[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_max_profiling_configs]: 1 | |
[xaicuyqjstadzph6cgvxowlzizkts6kzmfupsnbyaorxh37cppz] inductor_config[cuda.cutlass_max_profiling_swizzle_options]: [1, 2, 4] | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.cutlass_epilogue_fusion_enabled]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cuda.cutlass_tma_only]: True | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None | |
[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_denylist_regex]: None | |
[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[cuda.cutlass_instantiation_level]: 0 | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_presets]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.cutlass_hash_with_compile_cmd]: False | |
[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: [] | |
[3w3j4h2iiu3addwyb5alaeecz3so7teb23hp4d5n3b46w5n73ur] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx942'] | |
[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True | |
[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.kBatch_sweep]: None | |
[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[rocm.split_k_threshold]: 16 | |
[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp | |
[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton | |
[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host | |
[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda | |
[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021 | |
[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019 | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False | |
[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: [] | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[test_configs.max_mm_configs]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.static_cpp_dtype_assert]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[test_configs.autotune_choice_name_regex]: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[test_configs.autotune_choice_desc_regex]: None | |
[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.graphsafe_rng_func_ignores_fallback_random]: False | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None | |
[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None | |
[DEBUG]:FX cache key generated: fhek3hdikpc6h5t4gjddbvlkwxvg3wutqtw4ztaixgfqrdlj2om6 | |
[INFO]:fx graph cache miss for key fhek3hdikpc6h5t4gjddbvlkwxvg3wutqtw4ztaixgfqrdlj2om6 | |
[DEBUG]:FX cache miss, compiling and saving to cache | |
[DEBUG]:TritonBundler.begin_compile is called | |
[INFO]:Step 3: torchinductor compiling FORWARDS graph 0 | |
[DEBUG]:TRACED GRAPH | |
===== AFTER POST GRAD ===== | |
/data/users/mlazos/pytorch/torch/fx/_lazy_graph_module.py class <lambda>(torch.nn.Module): | |
def forward(self, arg0_1: "f16[512, 512][512, 1]cuda:0", arg1_1: "f16[512, 512][512, 1]cuda:0", arg2_1: "f16[512, 512][512, 1]cuda:0"): | |
# File: /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py:1446 in forward, code: res = (a @ b).relu() # add extra activation to not hit addmm path | |
mm: "f16[512, 512][512, 1]cuda:0" = torch.ops.aten.mm.default(arg0_1, arg1_1); arg0_1 = arg1_1 = None | |
relu: "f16[512, 512][512, 1]cuda:0" = torch.ops.aten.relu.default(mm); mm = None | |
# File: /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py:1447 in forward, code: return op(res, *extra_args) | |
mul: "f16[512, 512][512, 1]cuda:0" = torch.ops.aten.mul.Tensor(relu, arg2_1); relu = arg2_1 = None | |
return (mul,) | |
[DEBUG]:lowering %arg0_1 : [num_users=1] = placeholder[target=arg0_1] | |
[DEBUG]:lowering %arg1_1 : [num_users=1] = placeholder[target=arg1_1] | |
[DEBUG]:lowering %arg2_1 : [num_users=1] = placeholder[target=arg2_1] | |
[DEBUG]:lowering %mm : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%arg0_1, %arg1_1), kwargs = {}) | |
[DEBUG]: via <function tuned_mm at 0x7f88dfb71760> | |
[INFO]:Tuned aten.mm: m=512, n=512, k=512, mat1_dtype=torch.float16, mat2_dtype=torch.float16, output_layout=FixedLayout('cuda:0', torch.float16, size=[512, 512], stride=[512, 1]) | |
> /data/users/mlazos/pytorch/torch/_inductor/kernel/mm.py(713)tuned_mm() | |
-> if is_nonzero and use_cutlass_template(layout, m, n, k): | |
(Pdb) c | |
[DEBUG]:cutlass_library not found in sys.path, trying to import from config.cuda.cutlass_dir | |
[INFO]:CUTLASS library generated a dict of 1 operation kinds in 1.22 seconds | |
[INFO]:Got cutlass configs: total number of ops: 32. Filtering took 0.04 seconds | |
[DEBUG]:*** EmitGemmConfigurationLibrary::emit(operation) | |
[DEBUG]:*** operation.procedural_name(): cutlass3x_sm90_tensorop_s64x128x16gemm_f16_f16_f32_void_f16_128x128x64_1x2x1_0_ttn_align8_stream_k_warpspecialized_cooperative_epi_tma | |
[DEBUG]:*** tile_shape: [128, 128, 64] | |
[DEBUG]:*** warp_count: [4, 1, 1] | |
[INFO]:"libcudart_static.a" not found under /usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs | |
[DEBUG]:CUDA command: nvcc -t=0 -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1 -DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED -w -gencode=arch=compute_90a,code=[sm_90a,compute_90a] -O1 -std=c++17 --expt-relaxed-constexpr -DNDEBUG -Xcompiler=-fPIC -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -Xcompiler=-Wconversion -I/data/users/mlazos/pytorch/third_party/cutlass/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/src -I/data/users/mlazos/pytorch/third_party/cutlass/tools/util/include -L/usr/local/cuda-12.8/targets/x86_64-linux/lib -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib -L/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -lcuda -lcudart -shared -o dummy_output dummy_input | |
[DEBUG]:*** EmitGemmConfigurationLibrary::emit(operation) | |
[DEBUG]:*** operation.procedural_name(): cutlass3x_sm90_tensorop_s64x128x16gemm_f16_f16_f32_void_f16_128x128x64_1x2x1_0_ttn_align8_stream_k_warpspecialized_cooperative_epi_tma | |
[DEBUG]:*** tile_shape: [128, 128, 64] | |
[DEBUG]:*** warp_count: [4, 1, 1] | |
[DEBUG]:CUDA command: nvcc -t=0 -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1 -DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED -w -gencode=arch=compute_90a,code=[sm_90a,compute_90a] -O1 -std=c++17 --expt-relaxed-constexpr -DNDEBUG -Xcompiler=-fPIC -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -Xcompiler=-Wconversion -I/data/users/mlazos/pytorch/third_party/cutlass/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/src -I/data/users/mlazos/pytorch/third_party/cutlass/tools/util/include -L/usr/local/cuda-12.8/targets/x86_64-linux/lib -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib -L/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -lcuda -lcudart -shared -o dummy_output dummy_input | |
[DEBUG]:*** EmitGemmConfigurationLibrary::emit(operation) | |
[DEBUG]:*** operation.procedural_name(): cutlass3x_sm90_tensorop_s64x128x16gemm_f16_f16_f32_void_f16_128x128x64_1x2x1_0_ttn_align8_stream_k_warpspecialized_cooperative_epi_tma | |
[DEBUG]:*** tile_shape: [128, 128, 64] | |
[DEBUG]:*** warp_count: [4, 1, 1] | |
[DEBUG]:CUDA command: nvcc -t=0 -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1 -DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED -w -gencode=arch=compute_90a,code=[sm_90a,compute_90a] -O1 -std=c++17 --expt-relaxed-constexpr -DNDEBUG -Xcompiler=-fPIC -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -Xcompiler=-Wconversion -I/data/users/mlazos/pytorch/third_party/cutlass/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/include -I/data/users/mlazos/pytorch/third_party/cutlass/tools/library/src -I/data/users/mlazos/pytorch/third_party/cutlass/tools/util/include -L/usr/local/cuda-12.8/targets/x86_64-linux/lib -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib -L/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -Xlinker -rpath=/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs -lcuda -lcudart -shared -o dummy_output dummy_input | |
[DEBUG]:Added 1 Cutlass gemm configs. | |
[DEBUG]:Max autotune selects from 3 choices. | |
[DEBUG]:Starting precompilation | |
[DEBUG]:Found all 3 timings in cache, returning no_op | |
[DEBUG]:Precompilation elapsed time: 0.00s | |
[DEBUG]:Autotuning elapsed time: 0.00s | |
> /data/users/mlazos/pytorch/torch/_inductor/select_algorithm.py(1926)do_autotuning() | |
-> raise NoValidChoicesError |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment