Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created August 8, 2024 22:49
Show Gist options
  • Save AmosLewis/35ed28904fd6e82de0c66546b18579df to your computer and use it in GitHub Desktop.
Save AmosLewis/35ed28904fd6e82de0c66546b18579df to your computer and use it in GitHub Desktop.
failed to translate executables
failed to translate executables
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: error: One or more operations with large vector sizes (8192 bytes) were found:
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
<unknown>:0: note: %cst_3 = arith.constant dense<1.562500e-02> : vector<200704xf32>
dpn68_vaiq.default.onnx.linalg.mlir:1014:12: note: %5 = vector.transfer_read %3[%c0], %c0_i8 {in_bounds = [true]} : tensor<200704xi8>, vector<200704xi8>
%144 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:1016:15: note: %6 = arith.extsi %5 : vector<200704xi8> to vector<200704xi32>
%1072 = arith.extsi %in : i8 to i32
^
dpn68_vaiq.default.onnx.linalg.mlir:1017:15: note: %7 = arith.sitofp %6 : vector<200704xi32> to vector<200704xf32>
%1073 = arith.sitofp %1072 : i32 to f32
^
dpn68_vaiq.default.onnx.linalg.mlir:1018:15: note: %8 = arith.mulf %7, %cst_3 : vector<200704xf32>
%1074 = arith.mulf %1073, %cst_31 : f32
^
dpn68_vaiq.default.onnx.linalg.mlir:1018:15: note: %9 = vector.transfer_write %8, %4[%c0] {in_bounds = [true]} : vector<200704xf32>, tensor<200704xf32>
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: note: see current operation:
"hal.executable.variant"() ({
"hal.executable.export"() ({
^bb0(%arg8: !hal.device):
%62 = "arith.constant"() <{value = 2 : index}> : () -> index
%63 = "arith.constant"() <{value = 8 : index}> : () -> index
%64 = "arith.constant"() <{value = 1 : index}> : () -> index
"hal.return"(%62, %63, %64) : (index, index, index) -> ()
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "main_graph_dispatch_34_elementwise_64x56x56_f32xf32xi8"} : () -> ()
"builtin.module"() ({
"func.func"() <{function_type = () -> (), sym_name = "main_graph_dispatch_34_elementwise_64x56x56_f32xf32xi8"}> ({
%0 = "arith.constant"() <{value = dense<1.270000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%1 = "arith.constant"() <{value = dense<-1.280000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%2 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%3 = "arith.constant"() <{value = dense<1.562500e-02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%4 = "arith.constant"() <{value = dense<1.562500e-02> : vector<200704xf32>}> : () -> vector<200704xf32>
%5 = "arith.constant"() <{value = 0 : i8}> : () -> i8
%6 = "arith.constant"() <{value = 4 : index}> : () -> index
%7 = "arith.constant"() <{value = 1 : index}> : () -> index
%8 = "arith.constant"() <{value = 28 : index}> : () -> index
%9 = "arith.constant"() <{value = 8 : index}> : () -> index
%10 = "arith.constant"() <{value = 0 : index}> : () -> index
%11 = "arith.constant"() <{value = 64 : index}> : () -> index
%12 = "arith.constant"() <{value = 56 : index}> : () -> index
%13 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%14 = "arith.constant"() <{value = 2408448 : index}> : () -> index
%15 = "arith.constant"() <{value = 2207744 : index}> : () -> index
%16 = "arith.constant"() <{value = 802816 : index}> : () -> index
%17 = "hal.interface.binding.subspan"(%14) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>
%18 = "hal.interface.binding.subspan"(%15) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<200704xi8>>
%19 = "hal.interface.binding.subspan"(%16) {alignment = 64 : index, binding = 1 : index, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>>
%20 = "flow.dispatch.tensor.load"(%18) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, static_offsets = array<i64: 0>, static_sizes = array<i64: 200704>, static_strides = array<i64: 1>}> : (!flow.dispatch.tensor<readonly:tensor<200704xi8>>) -> tensor<200704xi8>
%21 = "tensor.empty"() : () -> tensor<200704xf32>
%22 = "vector.transfer_read"(%20, %10, %5) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (tensor<200704xi8>, index, i8) -> vector<200704xi8>
%23 = "arith.extsi"(%22) : (vector<200704xi8>) -> vector<200704xi32>
%24 = "arith.sitofp"(%23) : (vector<200704xi32>) -> vector<200704xf32>
%25 = "arith.mulf"(%24, %4) <{fastmath = #arith.fastmath<none>}> : (vector<200704xf32>, vector<200704xf32>) -> vector<200704xf32>
%26 = "vector.transfer_write"(%25, %21, %10) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (vector<200704xf32>, tensor<200704xf32>, index) -> tensor<200704xf32>
%27 = "tensor.expand_shape"(%26) <{reassociation = [[0, 1, 2]], static_output_shape = array<i64: 64, 56, 56>}> : (tensor<200704xf32>) -> tensor<64x56x56xf32>
%28 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%29 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%30 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%31 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%32 = "affine.apply"(%30) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index
%33 = "affine.apply"(%31) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index
%34 = "affine.apply"(%28) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%35 = "affine.apply"(%29) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%32, %11, %33) ({
^bb0(%arg0: index):
"scf.for"(%34, %12, %35) ({
^bb0(%arg1: index):
%36 = "flow.dispatch.tensor.load"(%19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (!flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>>, index, index) -> tensor<8x28x56xi8>
%37 = "flow.dispatch.tensor.load"(%17, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: 0, -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 1, 8, 28, 56>, static_strides = array<i64: 1, 1, 1, 1>}> : (!flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>, index, index) -> tensor<8x28x56xf32>
%38 = "scf.for"(%10, %9, %7, %36) ({
^bb0(%arg2: index, %arg3: tensor<8x28x56xi8>):
%39 = "scf.for"(%10, %8, %7, %arg3) ({
^bb0(%arg4: index, %arg5: tensor<8x28x56xi8>):
%40 = "scf.for"(%10, %12, %6, %arg5) ({
^bb0(%arg6: index, %arg7: tensor<8x28x56xi8>):
%41 = "arith.addi"(%arg2, %arg0) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index
%42 = "arith.addi"(%arg4, %arg1) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index
%43 = "vector.transfer_read"(%27, %41, %42, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<64x56x56xf32>, index, index, index, f32) -> vector<1x1x4xf32>
%44 = "vector.transfer_read"(%37, %arg2, %arg4, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<8x28x56xf32>, index, index, index, f32) -> vector<1x1x4xf32>
%45 = "arith.divf"(%44, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%46 = "math.roundeven"(%45) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32>
%47 = "arith.addf"(%46, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%48 = "arith.maximumf"(%47, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%49 = "arith.minimumf"(%48, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%50 = "arith.fptosi"(%49) : (vector<1x1x4xf32>) -> vector<1x1x4xi8>
%51 = "arith.extsi"(%50) : (vector<1x1x4xi8>) -> vector<1x1x4xi32>
%52 = "arith.sitofp"(%51) : (vector<1x1x4xi32>) -> vector<1x1x4xf32>
%53 = "arith.mulf"(%52, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%54 = "arith.addf"(%43, %53) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%55 = "arith.divf"(%54, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%56 = "math.roundeven"(%55) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32>
%57 = "arith.addf"(%56, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%58 = "arith.maximumf"(%57, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%59 = "arith.minimumf"(%58, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%60 = "arith.fptosi"(%59) : (vector<1x1x4xf32>) -> vector<1x1x4xi8>
%61 = "vector.transfer_write"(%60, %arg7, %arg2, %arg4, %arg6) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 1, 3, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (vector<1x1x4xi8>, tensor<8x28x56xi8>, index, index, index) -> tensor<8x28x56xi8>
"scf.yield"(%61) : (tensor<8x28x56xi8>) -> ()
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8>
"scf.yield"(%40) : (tensor<8x28x56xi8>) -> ()
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8>
"scf.yield"(%39) : (tensor<8x28x56xi8>) -> ()
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8>
"flow.dispatch.tensor.store"(%38, %19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (tensor<8x28x56xi8>, !flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>>, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} : () -> ()
}) : () -> ()
"hal.executable.variant_end"() : () -> ()
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: error: One or more operations with large vector sizes (8192 bytes) were found:
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
<unknown>:0: note: %cst_3 = arith.constant dense<1.562500e-02> : vector<200704xf32>
dpn68_vaiq.default.onnx.linalg.mlir:1263:12: note: %5 = vector.transfer_read %3[%c0], %c0_i8 {in_bounds = [true]} : tensor<200704xi8>, vector<200704xi8>
%182 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%180 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:1265:15: note: %6 = arith.extsi %5 : vector<200704xi8> to vector<200704xi32>
%1072 = arith.extsi %in : i8 to i32
^
dpn68_vaiq.default.onnx.linalg.mlir:1266:15: note: %7 = arith.sitofp %6 : vector<200704xi32> to vector<200704xf32>
%1073 = arith.sitofp %1072 : i32 to f32
^
dpn68_vaiq.default.onnx.linalg.mlir:1267:15: note: %8 = arith.mulf %7, %cst_3 : vector<200704xf32>
%1074 = arith.mulf %1073, %cst_31 : f32
^
dpn68_vaiq.default.onnx.linalg.mlir:1267:15: note: %9 = vector.transfer_write %8, %4[%c0] {in_bounds = [true]} : vector<200704xf32>, tensor<200704xf32>
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) {
^
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: note: see current operation:
"hal.executable.variant"() ({
"hal.executable.export"() ({
^bb0(%arg8: !hal.device):
%65 = "arith.constant"() <{value = 2 : index}> : () -> index
%66 = "arith.constant"() <{value = 8 : index}> : () -> index
%67 = "arith.constant"() <{value = 1 : index}> : () -> index
"hal.return"(%65, %66, %67) : (index, index, index) -> ()
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "main_graph_dispatch_47_elementwise_64x56x56_f32"} : () -> ()
"builtin.module"() ({
"func.func"() <{function_type = () -> (), sym_name = "main_graph_dispatch_47_elementwise_64x56x56_f32"}> ({
%0 = "arith.constant"() <{value = dense<1.270000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%1 = "arith.constant"() <{value = dense<-1.280000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%2 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%3 = "arith.constant"() <{value = dense<1.562500e-02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32>
%4 = "arith.constant"() <{value = dense<1.562500e-02> : vector<200704xf32>}> : () -> vector<200704xf32>
%5 = "arith.constant"() <{value = 0 : i8}> : () -> i8
%6 = "arith.constant"() <{value = 4 : index}> : () -> index
%7 = "arith.constant"() <{value = 1 : index}> : () -> index
%8 = "arith.constant"() <{value = 28 : index}> : () -> index
%9 = "arith.constant"() <{value = 8 : index}> : () -> index
%10 = "arith.constant"() <{value = 0 : index}> : () -> index
%11 = "arith.constant"() <{value = 64 : index}> : () -> index
%12 = "arith.constant"() <{value = 56 : index}> : () -> index
%13 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%14 = "arith.constant"() <{value = 2007040 : index}> : () -> index
%15 = "arith.constant"() <{value = 802816 : index}> : () -> index
%16 = "arith.constant"() <{value = 1003520 : index}> : () -> index
%17 = "hal.interface.binding.subspan"(%14) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>
%18 = "hal.interface.binding.subspan"(%15) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<200704xi8>>
%19 = "hal.interface.binding.subspan"(%16) {alignment = 64 : index, binding = 1 : index, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>>
%20 = "flow.dispatch.tensor.load"(%18) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, static_offsets = array<i64: 0>, static_sizes = array<i64: 200704>, static_strides = array<i64: 1>}> : (!flow.dispatch.tensor<readonly:tensor<200704xi8>>) -> tensor<200704xi8>
%21 = "tensor.empty"() : () -> tensor<200704xf32>
%22 = "vector.transfer_read"(%20, %10, %5) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (tensor<200704xi8>, index, i8) -> vector<200704xi8>
%23 = "arith.extsi"(%22) : (vector<200704xi8>) -> vector<200704xi32>
%24 = "arith.sitofp"(%23) : (vector<200704xi32>) -> vector<200704xf32>
%25 = "arith.mulf"(%24, %4) <{fastmath = #arith.fastmath<none>}> : (vector<200704xf32>, vector<200704xf32>) -> vector<200704xf32>
%26 = "vector.transfer_write"(%25, %21, %10) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (vector<200704xf32>, tensor<200704xf32>, index) -> tensor<200704xf32>
%27 = "tensor.expand_shape"(%26) <{reassociation = [[0, 1, 2]], static_output_shape = array<i64: 64, 56, 56>}> : (tensor<200704xf32>) -> tensor<64x56x56xf32>
%28 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%29 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%30 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%31 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%32 = "affine.apply"(%30) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index
%33 = "affine.apply"(%31) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index
%34 = "affine.apply"(%28) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%35 = "affine.apply"(%29) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%32, %11, %33) ({
^bb0(%arg0: index):
"scf.for"(%34, %12, %35) ({
^bb0(%arg1: index):
%36 = "flow.dispatch.tensor.load"(%19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (!flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>>, index, index) -> tensor<8x28x56xf32>
%37 = "flow.dispatch.tensor.load"(%17, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: 0, -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 1, 8, 28, 56>, static_strides = array<i64: 1, 1, 1, 1>}> : (!flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>, index, index) -> tensor<8x28x56xf32>
%38 = "scf.for"(%10, %9, %7, %36) ({
^bb0(%arg2: index, %arg3: tensor<8x28x56xf32>):
%39 = "scf.for"(%10, %8, %7, %arg3) ({
^bb0(%arg4: index, %arg5: tensor<8x28x56xf32>):
%40 = "scf.for"(%10, %12, %6, %arg5) ({
^bb0(%arg6: index, %arg7: tensor<8x28x56xf32>):
%41 = "arith.addi"(%arg2, %arg0) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index
%42 = "arith.addi"(%arg4, %arg1) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index
%43 = "vector.transfer_read"(%27, %41, %42, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<64x56x56xf32>, index, index, index, f32) -> vector<1x1x4xf32>
%44 = "vector.transfer_read"(%37, %arg2, %arg4, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<8x28x56xf32>, index, index, index, f32) -> vector<1x1x4xf32>
%45 = "arith.divf"(%44, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%46 = "math.roundeven"(%45) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32>
%47 = "arith.addf"(%46, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%48 = "arith.maximumf"(%47, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%49 = "arith.minimumf"(%48, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%50 = "arith.fptosi"(%49) : (vector<1x1x4xf32>) -> vector<1x1x4xi8>
%51 = "arith.extsi"(%50) : (vector<1x1x4xi8>) -> vector<1x1x4xi32>
%52 = "arith.sitofp"(%51) : (vector<1x1x4xi32>) -> vector<1x1x4xf32>
%53 = "arith.mulf"(%52, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%54 = "arith.addf"(%43, %53) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%55 = "arith.divf"(%54, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%56 = "math.roundeven"(%55) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32>
%57 = "arith.addf"(%56, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%58 = "arith.maximumf"(%57, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%59 = "arith.minimumf"(%58, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%60 = "arith.fptosi"(%59) : (vector<1x1x4xf32>) -> vector<1x1x4xi8>
%61 = "arith.extsi"(%60) : (vector<1x1x4xi8>) -> vector<1x1x4xi32>
%62 = "arith.sitofp"(%61) : (vector<1x1x4xi32>) -> vector<1x1x4xf32>
%63 = "arith.mulf"(%62, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32>
%64 = "vector.transfer_write"(%63, %arg7, %arg2, %arg4, %arg6) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 1, 3, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (vector<1x1x4xf32>, tensor<8x28x56xf32>, index, index, index) -> tensor<8x28x56xf32>
"scf.yield"(%64) : (tensor<8x28x56xf32>) -> ()
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32>
"scf.yield"(%40) : (tensor<8x28x56xf32>) -> ()
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32>
"scf.yield"(%39) : (tensor<8x28x56xf32>) -> ()
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32>
"flow.dispatch.tensor.store"(%38, %19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (tensor<8x28x56xf32>, !flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>>, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} : () -> ()
}) : () -> ()
"hal.executable.variant_end"() : () -> ()
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) {
^
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment