Created
August 8, 2024 22:49
-
-
Save AmosLewis/35ed28904fd6e82de0c66546b18579df to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
failed to translate executables | |
failed to translate executables | |
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: error: One or more operations with large vector sizes (8192 bytes) were found: | |
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from | |
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
<unknown>:0: note: %cst_3 = arith.constant dense<1.562500e-02> : vector<200704xf32> | |
dpn68_vaiq.default.onnx.linalg.mlir:1014:12: note: %5 = vector.transfer_read %3[%c0], %c0_i8 {in_bounds = [true]} : tensor<200704xi8>, vector<200704xi8> | |
%144 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%141 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1016:15: note: %6 = arith.extsi %5 : vector<200704xi8> to vector<200704xi32> | |
%1072 = arith.extsi %in : i8 to i32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1017:15: note: %7 = arith.sitofp %6 : vector<200704xi32> to vector<200704xf32> | |
%1073 = arith.sitofp %1072 : i32 to f32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1018:15: note: %8 = arith.mulf %7, %cst_3 : vector<200704xf32> | |
%1074 = arith.mulf %1073, %cst_31 : f32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1018:15: note: %9 = vector.transfer_write %8, %4[%c0] {in_bounds = [true]} : vector<200704xf32>, tensor<200704xf32> | |
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from | |
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1243:12: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg8: !hal.device): | |
%62 = "arith.constant"() <{value = 2 : index}> : () -> index | |
%63 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%64 = "arith.constant"() <{value = 1 : index}> : () -> index | |
"hal.return"(%62, %63, %64) : (index, index, index) -> () | |
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "main_graph_dispatch_34_elementwise_64x56x56_f32xf32xi8"} : () -> () | |
"builtin.module"() ({ | |
"func.func"() <{function_type = () -> (), sym_name = "main_graph_dispatch_34_elementwise_64x56x56_f32xf32xi8"}> ({ | |
%0 = "arith.constant"() <{value = dense<1.270000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%1 = "arith.constant"() <{value = dense<-1.280000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%2 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%3 = "arith.constant"() <{value = dense<1.562500e-02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%4 = "arith.constant"() <{value = dense<1.562500e-02> : vector<200704xf32>}> : () -> vector<200704xf32> | |
%5 = "arith.constant"() <{value = 0 : i8}> : () -> i8 | |
%6 = "arith.constant"() <{value = 4 : index}> : () -> index | |
%7 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%8 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%9 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%10 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%11 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%12 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%13 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = 2408448 : index}> : () -> index | |
%15 = "arith.constant"() <{value = 2207744 : index}> : () -> index | |
%16 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
%17 = "hal.interface.binding.subspan"(%14) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>> | |
%18 = "hal.interface.binding.subspan"(%15) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<200704xi8>> | |
%19 = "hal.interface.binding.subspan"(%16) {alignment = 64 : index, binding = 1 : index, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>> | |
%20 = "flow.dispatch.tensor.load"(%18) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, static_offsets = array<i64: 0>, static_sizes = array<i64: 200704>, static_strides = array<i64: 1>}> : (!flow.dispatch.tensor<readonly:tensor<200704xi8>>) -> tensor<200704xi8> | |
%21 = "tensor.empty"() : () -> tensor<200704xf32> | |
%22 = "vector.transfer_read"(%20, %10, %5) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (tensor<200704xi8>, index, i8) -> vector<200704xi8> | |
%23 = "arith.extsi"(%22) : (vector<200704xi8>) -> vector<200704xi32> | |
%24 = "arith.sitofp"(%23) : (vector<200704xi32>) -> vector<200704xf32> | |
%25 = "arith.mulf"(%24, %4) <{fastmath = #arith.fastmath<none>}> : (vector<200704xf32>, vector<200704xf32>) -> vector<200704xf32> | |
%26 = "vector.transfer_write"(%25, %21, %10) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (vector<200704xf32>, tensor<200704xf32>, index) -> tensor<200704xf32> | |
%27 = "tensor.expand_shape"(%26) <{reassociation = [[0, 1, 2]], static_output_shape = array<i64: 64, 56, 56>}> : (tensor<200704xf32>) -> tensor<64x56x56xf32> | |
%28 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%29 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%30 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%31 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%32 = "affine.apply"(%30) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index | |
%33 = "affine.apply"(%31) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index | |
%34 = "affine.apply"(%28) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%35 = "affine.apply"(%29) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%32, %11, %33) ({ | |
^bb0(%arg0: index): | |
"scf.for"(%34, %12, %35) ({ | |
^bb0(%arg1: index): | |
%36 = "flow.dispatch.tensor.load"(%19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (!flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>>, index, index) -> tensor<8x28x56xi8> | |
%37 = "flow.dispatch.tensor.load"(%17, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: 0, -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 1, 8, 28, 56>, static_strides = array<i64: 1, 1, 1, 1>}> : (!flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>, index, index) -> tensor<8x28x56xf32> | |
%38 = "scf.for"(%10, %9, %7, %36) ({ | |
^bb0(%arg2: index, %arg3: tensor<8x28x56xi8>): | |
%39 = "scf.for"(%10, %8, %7, %arg3) ({ | |
^bb0(%arg4: index, %arg5: tensor<8x28x56xi8>): | |
%40 = "scf.for"(%10, %12, %6, %arg5) ({ | |
^bb0(%arg6: index, %arg7: tensor<8x28x56xi8>): | |
%41 = "arith.addi"(%arg2, %arg0) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index | |
%42 = "arith.addi"(%arg4, %arg1) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index | |
%43 = "vector.transfer_read"(%27, %41, %42, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<64x56x56xf32>, index, index, index, f32) -> vector<1x1x4xf32> | |
%44 = "vector.transfer_read"(%37, %arg2, %arg4, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<8x28x56xf32>, index, index, index, f32) -> vector<1x1x4xf32> | |
%45 = "arith.divf"(%44, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%46 = "math.roundeven"(%45) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%47 = "arith.addf"(%46, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%48 = "arith.maximumf"(%47, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%49 = "arith.minimumf"(%48, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%50 = "arith.fptosi"(%49) : (vector<1x1x4xf32>) -> vector<1x1x4xi8> | |
%51 = "arith.extsi"(%50) : (vector<1x1x4xi8>) -> vector<1x1x4xi32> | |
%52 = "arith.sitofp"(%51) : (vector<1x1x4xi32>) -> vector<1x1x4xf32> | |
%53 = "arith.mulf"(%52, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%54 = "arith.addf"(%43, %53) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%55 = "arith.divf"(%54, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%56 = "math.roundeven"(%55) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%57 = "arith.addf"(%56, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%58 = "arith.maximumf"(%57, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%59 = "arith.minimumf"(%58, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%60 = "arith.fptosi"(%59) : (vector<1x1x4xf32>) -> vector<1x1x4xi8> | |
%61 = "vector.transfer_write"(%60, %arg7, %arg2, %arg4, %arg6) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 1, 3, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (vector<1x1x4xi8>, tensor<8x28x56xi8>, index, index, index) -> tensor<8x28x56xi8> | |
"scf.yield"(%61) : (tensor<8x28x56xi8>) -> () | |
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8> | |
"scf.yield"(%40) : (tensor<8x28x56xi8>) -> () | |
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8> | |
"scf.yield"(%39) : (tensor<8x28x56xi8>) -> () | |
}) : (index, index, index, tensor<8x28x56xi8>) -> tensor<8x28x56xi8> | |
"flow.dispatch.tensor.store"(%38, %19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (tensor<8x28x56xi8>, !flow.dispatch.tensor<writeonly:tensor<64x56x56xi8>>, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} : () -> () | |
}) : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
%180 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%179 : tensor<1x64x56x56xf32>) outs(%107 : tensor<1x64x56x56xi8>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: error: One or more operations with large vector sizes (8192 bytes) were found: | |
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from | |
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
<unknown>:0: note: %cst_3 = arith.constant dense<1.562500e-02> : vector<200704xf32> | |
dpn68_vaiq.default.onnx.linalg.mlir:1263:12: note: %5 = vector.transfer_read %3[%c0], %c0_i8 {in_bounds = [true]} : tensor<200704xi8>, vector<200704xi8> | |
%182 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%180 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1265:15: note: %6 = arith.extsi %5 : vector<200704xi8> to vector<200704xi32> | |
%1072 = arith.extsi %in : i8 to i32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1266:15: note: %7 = arith.sitofp %6 : vector<200704xi32> to vector<200704xf32> | |
%1073 = arith.sitofp %1072 : i32 to f32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1267:15: note: %8 = arith.mulf %7, %cst_3 : vector<200704xf32> | |
%1074 = arith.mulf %1073, %cst_31 : f32 | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1267:15: note: %9 = vector.transfer_write %8, %4[%c0] {in_bounds = [true]} : vector<200704xf32>, tensor<200704xf32> | |
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:9:3: note: called from | |
func.func @main_graph(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
dpn68_vaiq.default.onnx.linalg.mlir:1497:12: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg8: !hal.device): | |
%65 = "arith.constant"() <{value = 2 : index}> : () -> index | |
%66 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%67 = "arith.constant"() <{value = 1 : index}> : () -> index | |
"hal.return"(%65, %66, %67) : (index, index, index) -> () | |
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "main_graph_dispatch_47_elementwise_64x56x56_f32"} : () -> () | |
"builtin.module"() ({ | |
"func.func"() <{function_type = () -> (), sym_name = "main_graph_dispatch_47_elementwise_64x56x56_f32"}> ({ | |
%0 = "arith.constant"() <{value = dense<1.270000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%1 = "arith.constant"() <{value = dense<-1.280000e+02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%2 = "arith.constant"() <{value = dense<0.000000e+00> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%3 = "arith.constant"() <{value = dense<1.562500e-02> : vector<1x1x4xf32>}> : () -> vector<1x1x4xf32> | |
%4 = "arith.constant"() <{value = dense<1.562500e-02> : vector<200704xf32>}> : () -> vector<200704xf32> | |
%5 = "arith.constant"() <{value = 0 : i8}> : () -> i8 | |
%6 = "arith.constant"() <{value = 4 : index}> : () -> index | |
%7 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%8 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%9 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%10 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%11 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%12 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%13 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = 2007040 : index}> : () -> index | |
%15 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
%16 = "arith.constant"() <{value = 1003520 : index}> : () -> index | |
%17 = "hal.interface.binding.subspan"(%14) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>> | |
%18 = "hal.interface.binding.subspan"(%15) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<readonly:tensor<200704xi8>> | |
%19 = "hal.interface.binding.subspan"(%16) {alignment = 64 : index, binding = 1 : index, layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> !flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>> | |
%20 = "flow.dispatch.tensor.load"(%18) <{operandSegmentSizes = array<i32: 1, 0, 0, 0, 0>, static_offsets = array<i64: 0>, static_sizes = array<i64: 200704>, static_strides = array<i64: 1>}> : (!flow.dispatch.tensor<readonly:tensor<200704xi8>>) -> tensor<200704xi8> | |
%21 = "tensor.empty"() : () -> tensor<200704xf32> | |
%22 = "vector.transfer_read"(%20, %10, %5) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (tensor<200704xi8>, index, i8) -> vector<200704xi8> | |
%23 = "arith.extsi"(%22) : (vector<200704xi8>) -> vector<200704xi32> | |
%24 = "arith.sitofp"(%23) : (vector<200704xi32>) -> vector<200704xf32> | |
%25 = "arith.mulf"(%24, %4) <{fastmath = #arith.fastmath<none>}> : (vector<200704xf32>, vector<200704xf32>) -> vector<200704xf32> | |
%26 = "vector.transfer_write"(%25, %21, %10) <{in_bounds = [true], operandSegmentSizes = array<i32: 1, 1, 1, 0>, permutation_map = affine_map<(d0) -> (d0)>}> : (vector<200704xf32>, tensor<200704xf32>, index) -> tensor<200704xf32> | |
%27 = "tensor.expand_shape"(%26) <{reassociation = [[0, 1, 2]], static_output_shape = array<i64: 64, 56, 56>}> : (tensor<200704xf32>) -> tensor<64x56x56xf32> | |
%28 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%29 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%30 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%31 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%32 = "affine.apply"(%30) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index | |
%33 = "affine.apply"(%31) <{map = affine_map<()[s0] -> (s0 * 8)>}> : (index) -> index | |
%34 = "affine.apply"(%28) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%35 = "affine.apply"(%29) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%32, %11, %33) ({ | |
^bb0(%arg0: index): | |
"scf.for"(%34, %12, %35) ({ | |
^bb0(%arg1: index): | |
%36 = "flow.dispatch.tensor.load"(%19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (!flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>>, index, index) -> tensor<8x28x56xf32> | |
%37 = "flow.dispatch.tensor.load"(%17, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 0, 2, 0, 0>, static_offsets = array<i64: 0, -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 1, 8, 28, 56>, static_strides = array<i64: 1, 1, 1, 1>}> : (!flow.dispatch.tensor<readonly:tensor<1x80x56x56xf32>>, index, index) -> tensor<8x28x56xf32> | |
%38 = "scf.for"(%10, %9, %7, %36) ({ | |
^bb0(%arg2: index, %arg3: tensor<8x28x56xf32>): | |
%39 = "scf.for"(%10, %8, %7, %arg3) ({ | |
^bb0(%arg4: index, %arg5: tensor<8x28x56xf32>): | |
%40 = "scf.for"(%10, %12, %6, %arg5) ({ | |
^bb0(%arg6: index, %arg7: tensor<8x28x56xf32>): | |
%41 = "arith.addi"(%arg2, %arg0) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index | |
%42 = "arith.addi"(%arg4, %arg1) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index | |
%43 = "vector.transfer_read"(%27, %41, %42, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<64x56x56xf32>, index, index, index, f32) -> vector<1x1x4xf32> | |
%44 = "vector.transfer_read"(%37, %arg2, %arg4, %arg6, %13) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (tensor<8x28x56xf32>, index, index, index, f32) -> vector<1x1x4xf32> | |
%45 = "arith.divf"(%44, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%46 = "math.roundeven"(%45) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%47 = "arith.addf"(%46, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%48 = "arith.maximumf"(%47, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%49 = "arith.minimumf"(%48, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%50 = "arith.fptosi"(%49) : (vector<1x1x4xf32>) -> vector<1x1x4xi8> | |
%51 = "arith.extsi"(%50) : (vector<1x1x4xi8>) -> vector<1x1x4xi32> | |
%52 = "arith.sitofp"(%51) : (vector<1x1x4xi32>) -> vector<1x1x4xf32> | |
%53 = "arith.mulf"(%52, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%54 = "arith.addf"(%43, %53) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%55 = "arith.divf"(%54, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%56 = "math.roundeven"(%55) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%57 = "arith.addf"(%56, %2) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%58 = "arith.maximumf"(%57, %1) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%59 = "arith.minimumf"(%58, %0) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%60 = "arith.fptosi"(%59) : (vector<1x1x4xf32>) -> vector<1x1x4xi8> | |
%61 = "arith.extsi"(%60) : (vector<1x1x4xi8>) -> vector<1x1x4xi32> | |
%62 = "arith.sitofp"(%61) : (vector<1x1x4xi32>) -> vector<1x1x4xf32> | |
%63 = "arith.mulf"(%62, %3) <{fastmath = #arith.fastmath<none>}> : (vector<1x1x4xf32>, vector<1x1x4xf32>) -> vector<1x1x4xf32> | |
%64 = "vector.transfer_write"(%63, %arg7, %arg2, %arg4, %arg6) <{in_bounds = [true, true, true], operandSegmentSizes = array<i32: 1, 1, 3, 0>, permutation_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>}> : (vector<1x1x4xf32>, tensor<8x28x56xf32>, index, index, index) -> tensor<8x28x56xf32> | |
"scf.yield"(%64) : (tensor<8x28x56xf32>) -> () | |
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32> | |
"scf.yield"(%40) : (tensor<8x28x56xf32>) -> () | |
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32> | |
"scf.yield"(%39) : (tensor<8x28x56xf32>) -> () | |
}) : (index, index, index, tensor<8x28x56xf32>) -> tensor<8x28x56xf32> | |
"flow.dispatch.tensor.store"(%38, %19, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, -9223372036854775808, 0>, static_sizes = array<i64: 8, 28, 56>, static_strides = array<i64: 1, 1, 1>}> : (tensor<8x28x56xf32>, !flow.dispatch.tensor<writeonly:tensor<64x56x56xf32>>, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} : () -> () | |
}) : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
%215 = linalg.generic {indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214 : tensor<1x64x56x56xi8>) outs(%115 : tensor<1x64x56x56xf32>) { | |
^ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment