Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created June 12, 2024 03:35
Show Gist options
  • Save AmosLewis/87a575233f9fbec9d9fa7fc0279480bf to your computer and use it in GitHub Desktop.
Save AmosLewis/87a575233f9fbec9d9fa7fc0279480bf to your computer and use it in GitHub Desktop.
failed to translate executables
failed to translate executables
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation:
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32
%28 = "arith.constant"() <{value = 128 : index}> : () -> index
%29 = "arith.constant"() <{value = 64 : index}> : () -> index
%30 = "arith.constant"() <{value = 1 : index}> : () -> index
%31 = "arith.constant"() <{value = 28 : index}> : () -> index
%32 = "arith.constant"() <{value = 0 : index}> : () -> index
%33 = "arith.constant"() <{value = 86213696 : index}> : () -> index
%34 = "arith.constant"() <{value = 2420992 : index}> : () -> index
%35 = "arith.constant"() <{value = 2019584 : index}> : () -> index
%36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32
%37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32
%38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32
%39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32
%40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32
%41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32
%43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32
%44 = "arith.constant"() <{value = 0 : i32}> : () -> i32
%45 = "arith.constant"() <{value = 56 : index}> : () -> index
%46 = "arith.constant"() <{value = 512 : index}> : () -> index
%47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32>
%48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>
"memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> ()
%49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>
"memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> ()
%50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>>
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> ()
%51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8>
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> ()
%52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%56, %45, %57) ({
^bb0(%arg0: index):
%58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
%59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
"scf.for"(%58, %46, %59) ({
^bb0(%arg1: index):
%60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>
%61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>>
%62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>
%63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>
"scf.for"(%32, %31, %30) ({
^bb0(%arg9: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg10: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg11: index):
"memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg5: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg6: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg7: index):
"scf.for"(%32, %28, %30) ({
^bb0(%arg8: index):
%129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8
%130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8
%131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%132 = "arith.extsi"(%129) : (i8) -> i32
%133 = "arith.extsi"(%130) : (i8) -> i32
%134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
%135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
"memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg2: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg3: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg4: index):
%64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32
%65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%66 = "arith.sitofp"(%65) : (i32) -> f32
%67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32
%75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32
%76 = "arith.fptosi"(%75) : (f32) -> i8
%77 = "arith.extsi"(%76) : (i8) -> i32
%78 = "arith.sitofp"(%77) : (i32) -> f32
%79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32
%84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32
%86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32
%87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32
%88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32
%89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32
%90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32
%91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32
%92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32
%93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32
%95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32
%96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32
%97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32
%98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32
%99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32
%100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32
%101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32
%102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32
%103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32
%104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32
%116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32
%118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32
%127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32
%128 = "arith.fptosi"(%127) : (f32) -> i8
"memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation:
"hal.executable.variant"() ({
"hal.executable.export"() ({
^bb0(%arg12: !hal.device):
%136 = "arith.constant"() <{value = 8 : index}> : () -> index
%137 = "arith.constant"() <{value = 2 : index}> : () -> index
%138 = "arith.constant"() <{value = 1 : index}> : () -> index
"hal.return"(%136, %137, %138) : (index, index, index) -> ()
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> ()
"builtin.module"() ({
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32
%28 = "arith.constant"() <{value = 128 : index}> : () -> index
%29 = "arith.constant"() <{value = 64 : index}> : () -> index
%30 = "arith.constant"() <{value = 1 : index}> : () -> index
%31 = "arith.constant"() <{value = 28 : index}> : () -> index
%32 = "arith.constant"() <{value = 0 : index}> : () -> index
%33 = "arith.constant"() <{value = 86213696 : index}> : () -> index
%34 = "arith.constant"() <{value = 2420992 : index}> : () -> index
%35 = "arith.constant"() <{value = 2019584 : index}> : () -> index
%36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32
%37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32
%38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32
%39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32
%40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32
%41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32
%43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32
%44 = "arith.constant"() <{value = 0 : i32}> : () -> i32
%45 = "arith.constant"() <{value = 56 : index}> : () -> index
%46 = "arith.constant"() <{value = 512 : index}> : () -> index
%47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32>
%48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>
"memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> ()
%49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>
"memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> ()
%50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>>
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> ()
%51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8>
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> ()
%52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%56, %45, %57) ({
^bb0(%arg0: index):
%58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
%59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
"scf.for"(%58, %46, %59) ({
^bb0(%arg1: index):
%60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>
%61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>>
%62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>
%63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>
"scf.for"(%32, %31, %30) ({
^bb0(%arg9: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg10: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg11: index):
"memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg5: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg6: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg7: index):
"scf.for"(%32, %28, %30) ({
^bb0(%arg8: index):
%129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8
%130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8
%131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%132 = "arith.extsi"(%129) : (i8) -> i32
%133 = "arith.extsi"(%130) : (i8) -> i32
%134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
%135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
"memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg2: index):
"scf.for"(%32, %45, %30) ({
^bb0(%arg3: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg4: index):
%64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32
%65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%66 = "arith.sitofp"(%65) : (i32) -> f32
%67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32
%75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32
%76 = "arith.fptosi"(%75) : (f32) -> i8
%77 = "arith.extsi"(%76) : (i8) -> i32
%78 = "arith.sitofp"(%77) : (i32) -> f32
%79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32
%84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32
%86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32
%87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32
%88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32
%89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32
%90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32
%91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32
%92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32
%93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32
%95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32
%96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32
%97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32
%98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32
%99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32
%100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32
%101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32
%102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32
%103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32
%104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32
%116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32
%118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32
%127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32
%128 = "arith.fptosi"(%127) : (f32) -> i8
"memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
}) : () -> ()
"hal.executable.variant_end"() : () -> ()
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation:
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32
%28 = "arith.constant"() <{value = 128 : index}> : () -> index
%29 = "arith.constant"() <{value = 64 : index}> : () -> index
%30 = "arith.constant"() <{value = 1 : index}> : () -> index
%31 = "arith.constant"() <{value = 28 : index}> : () -> index
%32 = "arith.constant"() <{value = 0 : index}> : () -> index
%33 = "arith.constant"() <{value = 802816 : index}> : () -> index
%34 = "arith.constant"() <{value = 86217280 : index}> : () -> index
%35 = "arith.constant"() <{value = 2408448 : index}> : () -> index
%36 = "arith.constant"() <{value = 401408 : index}> : () -> index
%37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32
%38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32
%39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32
%40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32
%41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32
%42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32
%43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32
%45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32
%46 = "arith.constant"() <{value = 0 : i32}> : () -> i32
%47 = "arith.constant"() <{value = 56 : index}> : () -> index
%48 = "arith.constant"() <{value = 512 : index}> : () -> index
%49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32>
%50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> ()
%51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> ()
%52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>>
"memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> ()
%53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>
"memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> ()
%54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%58, %47, %59) ({
^bb0(%arg0: index):
%60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
%61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
"scf.for"(%60, %48, %61) ({
^bb0(%arg1: index):
%62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>
%63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>>
%64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>
%65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>
"scf.for"(%32, %31, %30) ({
^bb0(%arg9: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg10: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg11: index):
"memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg5: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg6: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg7: index):
"scf.for"(%32, %28, %30) ({
^bb0(%arg8: index):
%131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8
%132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8
%133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%134 = "arith.extsi"(%131) : (i8) -> i32
%135 = "arith.extsi"(%132) : (i8) -> i32
%136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
%137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
"memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg2: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg3: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg4: index):
%66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32
%67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%68 = "arith.sitofp"(%67) : (i32) -> f32
%69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32
%77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32
%78 = "arith.fptosi"(%77) : (f32) -> i8
%79 = "arith.extsi"(%78) : (i8) -> i32
%80 = "arith.sitofp"(%79) : (i32) -> f32
%81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32
%86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32
%88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32
%89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32
%90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32
%91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32
%92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32
%93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32
%94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32
%95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32
%97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32
%98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32
%99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32
%100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32
%101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32
%102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32
%103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32
%104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32
%105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32
%106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32
%118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32
%120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32
%129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32
%130 = "arith.fptosi"(%129) : (f32) -> i8
"memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> {
^
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation:
"hal.executable.variant"() ({
"hal.executable.export"() ({
^bb0(%arg12: !hal.device):
%138 = "arith.constant"() <{value = 8 : index}> : () -> index
%139 = "arith.constant"() <{value = 2 : index}> : () -> index
%140 = "arith.constant"() <{value = 1 : index}> : () -> index
"hal.return"(%138, %139, %140) : (index, index, index) -> ()
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> ()
"builtin.module"() ({
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32
%28 = "arith.constant"() <{value = 128 : index}> : () -> index
%29 = "arith.constant"() <{value = 64 : index}> : () -> index
%30 = "arith.constant"() <{value = 1 : index}> : () -> index
%31 = "arith.constant"() <{value = 28 : index}> : () -> index
%32 = "arith.constant"() <{value = 0 : index}> : () -> index
%33 = "arith.constant"() <{value = 802816 : index}> : () -> index
%34 = "arith.constant"() <{value = 86217280 : index}> : () -> index
%35 = "arith.constant"() <{value = 2408448 : index}> : () -> index
%36 = "arith.constant"() <{value = 401408 : index}> : () -> index
%37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32
%38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32
%39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32
%40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32
%41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32
%42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32
%43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
%44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32
%45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32
%46 = "arith.constant"() <{value = 0 : i32}> : () -> i32
%47 = "arith.constant"() <{value = 56 : index}> : () -> index
%48 = "arith.constant"() <{value = 512 : index}> : () -> index
%49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32>
%50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> ()
%51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> ()
%52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>>
"memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> ()
%53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>
"memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> ()
%54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index
%55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index
%56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index
%57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index
%58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
%59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index
"scf.for"(%58, %47, %59) ({
^bb0(%arg0: index):
%60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
%61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index
"scf.for"(%60, %48, %61) ({
^bb0(%arg1: index):
%62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>
%63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>>
%64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>
%65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>
"scf.for"(%32, %31, %30) ({
^bb0(%arg9: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg10: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg11: index):
"memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg5: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg6: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg7: index):
"scf.for"(%32, %28, %30) ({
^bb0(%arg8: index):
%131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8
%132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8
%133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%134 = "arith.extsi"(%131) : (i8) -> i32
%135 = "arith.extsi"(%132) : (i8) -> i32
%136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
%137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32
"memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.for"(%32, %31, %30) ({
^bb0(%arg2: index):
"scf.for"(%32, %47, %30) ({
^bb0(%arg3: index):
"scf.for"(%32, %29, %30) ({
^bb0(%arg4: index):
%66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32
%67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32
%68 = "arith.sitofp"(%67) : (i32) -> f32
%69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32
%77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32
%78 = "arith.fptosi"(%77) : (f32) -> i8
%79 = "arith.extsi"(%78) : (i8) -> i32
%80 = "arith.sitofp"(%79) : (i32) -> f32
%81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32
%86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32
%88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32
%89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32
%90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32
%91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32
%92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32
%93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32
%94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32
%95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1
%96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32
%97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32
%98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32
%99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32
%100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32
%101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32
%102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32
%103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32
%104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32
%105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32
%106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32
%115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32
%118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32
%120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32
%125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
%126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1
%127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1
%128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32
%129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32
%130 = "arith.fptosi"(%129) : (f32) -> i8
"memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"scf.yield"() : () -> ()
}) : (index, index, index) -> ()
"func.return"() : () -> ()
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
}) : () -> ()
"hal.executable.variant_end"() : () -> ()
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) {
^
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment