๐
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After ConvertTorchOnnxToTorch (convert-torch-onnx-to-torch) //----- // | |
func.func @torch_jit(%arg0: !torch.vtensor<[1,128,4,256],f32>) -> !torch.vtensor<[1,257,4,256],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 21 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.12.1"} { | |
%0 = torch.vtensor.literal(dense_resource<__onnx_constant_not_found_possibly_due_to_being_elided__> : tensor<257x128x1x1xf32>) : !torch.vtensor<[257,128,1,1],f32> | |
%1 = torch.vtensor.literal(dense_resource<__onnx_constant_not_found_possibly_due_to_being_elided___1> : tensor<257xf32>) : !torch.vtensor<[257],f32> | |
%int0 = torch.constant.int 0 | |
%int0_0 = torch.constant.int 0 | |
%2 = torch.prim.ListConstruct %int0, %int0_0 : (!torch.int, !torch.int) -> !torch.list<int> | |
%int1 = torch.constant.int 1 | |
%int1_1 = torch.constant.int 1 | |
%int1_2 = torch.constant.int 1 |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @unaligned_k(%arg0: tensor<128x258xf32>, %arg1: tensor<258x256xf32>) -> tensor<128x256xf32> { | |
%c0 = arith.constant 0 : index | |
%cst = arith.constant 0.000000e+00 : f32 | |
%0 = tensor.empty() : tensor<128x256xf32> | |
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128x256xf32>) -> tensor<128x256xf32> | |
%2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x258xf32>, tensor<258x256xf32>) outs(%0 : tensor<128x256xf32>) -> tensor<128x256xf32> | |
return %2 : tensor<128x256xf32> | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AssignLegacyTargetDevicesPass (iree-hal-assign-legacy-target-devices) //----- // | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-amx-fp8,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,+sse4.2,-tsxldtrk,-sm3,-ptwrite,-widekl,-movrs,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-ccmp,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-avx10.2-256,+gfni,-avxvnniint16,-amx-fp16,-zu,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,-nf,-amx-tf32,-amx-avx512,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-amx-transpose,-avx10.2-512,-avxneconvert,-tbm,-pconfig,-am |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hal.executable public @prefill_bs4$async_dispatch_122 { | |
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>) { | |
hal.executable.export public @prefill_bs4$async_dispatch_122_transpose_4x4xDx128_f16 ordinal(0) layout(#hal.pipeline.layout<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) { | |
^bb0(%arg0: !hal.device, %arg1: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @prefill_bs4$async_dispatch_122_transpose_4x4xDx128_f16() { | |
%c32_i64 = arith.constant 32 : i64 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
failed to translate executables | |
prefill_8b_tp8.mlir:9903:13: error: One or more operations with large vector sizes (8192 bytes) were found: | |
%3425 = torch.aten.transpose.int %3417#0, %int1_1244, %int2_1245 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16> | |
^ | |
prefill_8b_tp8.mlir:9857:15: note: %67 = vector.transfer_read %extracted_slice_8[%c0, %c0, %c0, %c0, %c0, %c0], %cst_3, %66 {in_bounds = [true, true, true, true, true, true]} : tensor<4x1x?x1x1x128xf16>, vector<4x1x8x1x1x128xf16> | |
%3417:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3393, %3401, %3409, %float0.000000e00, %true_1213, %none_1214, %none_1215) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) | |
^ | |
prefill_8b_tp8.mlir:9857:15: note: %69 = arith.extf %67 : vector<4x1x8x1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @matmul_2048x512x1024_f32_f32() -> tensor<2048x512xf32> { | |
%0 = util.unfoldable_constant dense<1.000000e+00> : tensor<2048x1024xf32> | |
%1 = util.unfoldable_constant dense<4.000000e-01> : tensor<1024x512xf32> | |
%cst = arith.constant 0.000000e+00 : f32 | |
%2 = tensor.empty() : tensor<2048x512xf32> | |
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<2048x512xf32>) -> tensor<2048x512xf32> | |
%4 = linalg.matmul ins(%0, %1 : tensor<2048x1024xf32>, tensor<1024x512xf32>) outs(%3 : tensor<2048x512xf32>) -> tensor<2048x512xf32> | |
return %4 : tensor<2048x512xf32> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
module { | |
func.func @matmul_2048x512x1024_f32_f32() -> tensor<2048x512xf32> { | |
%0 = util.unfoldable_constant dense<1.000000e+00> : tensor<2048x1024xf32> | |
%1 = util.unfoldable_constant dense<4.000000e-01> : tensor<1024x512xf32> | |
%cst = arith.constant 0.000000e+00 : f32 | |
%2 = tensor.empty() : tensor<2048x512xf32> | |
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<2048x512xf32>) -> tensor<2048x512xf32> | |
%4 = linalg.matmul ins(%0, %1 : tensor<2048x1024xf32>, tensor<1024x512xf32>) outs(%3 : tensor<2048x512xf32>) -> tensor<2048x512xf32> | |
return %4 : tensor<2048x512xf32> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
==2213870==ERROR: AddressSanitizer: heap-use-after-free on address 0x50e000021d2c at pc 0x7b499bfdc2d7 bp 0x7fff58a95a70 sp 0x7fff58a95a68 | |
READ of size 4 at 0x50e000021d2c thread T0 | |
#0 0x7b499bfdc2d6 in mlir::Operation::getRegions() /home/nod/iree/third_party/llvm-project/mlir/include/mlir/IR/Operation.h:674:9 | |
#1 0x7b499bfdc2d6 in mlir::ForwardIterator::makeIterable(mlir::Operation&) /home/nod/iree/third_party/llvm-project/mlir/lib/IR/Visitors.cpp:18:16 | |
#2 0x7b499bd783f1 in void mlir::detail::walk<mlir::ForwardIterator>(mlir::Operation*, llvm::function_ref<void (mlir::Operation*)>, mlir::WalkOrder) /home/nod/iree/third_party/llvm-project/mlir/include/mlir/IR/Visitors.h:176:23 | |
#3 0x7b49a160111e in std::enable_if<!llvm::is_one_of<mlir::gpu::ThreadIdOp, mlir::Operation*, mlir::Region*, mlir::Block*>::value && std::is_same<void, void>::value, void>::type mlir::detail::walk<(mlir::WalkOrder)1, mlir::ForwardIterator, void replaceUnitMappingIdsHelper<mlir::gpu::ThreadIdOp, mlir::Operation>(mlir::Rewr |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After GPUGeneralizeNamedOpsPass (iree-codegen-gpu-generalize-named-ops) //----- // | |
func.func @dot_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%c1024 = arith.constant 1024 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(0) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(1) : !flow.dispatch.tensor<readonly:tensor<1024x1024xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>) binding(2) : !flow.dispatch.tensor<writeonly:tensor<1024x1024xf32>> | |
%3 = flow.di |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- // | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDefault> | |
module { | |
func.func @quantized_matmul_neither_zp_0_dynamic(%arg0: tensor<256x256xi8>, %arg1: tensor<256x256xi8>, %arg2: i32, %arg3: i32, %arg4: tensor<256x256xi32>) -> tensor<256x256xi32> attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_, translation_info = #translation} { | |
%0 = linalg.quantized_matmul ins(%arg0, %arg1, %arg2, %arg3 : tensor<256x256xi8>, tensor<256x256xi8>, i32, i32) outs(%arg4 : tensor<256x256xi32>) -> tensor<256x256xi32> | |
return %0 : tensor<256x256xi32> | |
} | |
} |