Created
June 12, 2024 03:35
-
-
Save AmosLewis/87a575233f9fbec9d9fa7fc0279480bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
failed to translate executables | |
failed to translate executables | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes | |
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation: | |
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
%28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
%29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%33 = "arith.constant"() <{value = 86213696 : index}> : () -> index | |
%34 = "arith.constant"() <{value = 2420992 : index}> : () -> index | |
%35 = "arith.constant"() <{value = 2019584 : index}> : () -> index | |
%36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
%37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
%38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
%39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
%40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
%41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32 | |
%43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32 | |
%44 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
%45 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%46 = "arith.constant"() <{value = 512 : index}> : () -> index | |
%47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
%48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>> | |
"memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> () | |
%49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>> | |
"memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> () | |
%50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>> | |
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> () | |
%51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8> | |
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> () | |
%52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%56, %45, %57) ({ | |
^bb0(%arg0: index): | |
%58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
%59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
"scf.for"(%58, %46, %59) ({ | |
^bb0(%arg1: index): | |
%60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
%61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
%62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
%63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg9: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg10: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg11: index): | |
"memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg5: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg6: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg7: index): | |
"scf.for"(%32, %28, %30) ({ | |
^bb0(%arg8: index): | |
%129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
%130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
%131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%132 = "arith.extsi"(%129) : (i8) -> i32 | |
%133 = "arith.extsi"(%130) : (i8) -> i32 | |
%134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
%135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
"memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg2: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg3: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg4: index): | |
%64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
%65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%66 = "arith.sitofp"(%65) : (i32) -> f32 | |
%67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32 | |
%75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32 | |
%76 = "arith.fptosi"(%75) : (f32) -> i8 | |
%77 = "arith.extsi"(%76) : (i8) -> i32 | |
%78 = "arith.sitofp"(%77) : (i32) -> f32 | |
%79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32 | |
%84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32 | |
%86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32 | |
%87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32 | |
%88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32 | |
%89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32 | |
%90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32 | |
%91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32 | |
%92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32 | |
%93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32 | |
%95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32 | |
%96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32 | |
%97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32 | |
%98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32 | |
%99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32 | |
%100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32 | |
%101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32 | |
%102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32 | |
%103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32 | |
%104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32 | |
%116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32 | |
%118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32 | |
%127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32 | |
%128 = "arith.fptosi"(%127) : (f32) -> i8 | |
"memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg12: !hal.device): | |
%136 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%137 = "arith.constant"() <{value = 2 : index}> : () -> index | |
%138 = "arith.constant"() <{value = 1 : index}> : () -> index | |
"hal.return"(%136, %137, %138) : (index, index, index) -> () | |
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> () | |
"builtin.module"() ({ | |
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
%28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
%29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%33 = "arith.constant"() <{value = 86213696 : index}> : () -> index | |
%34 = "arith.constant"() <{value = 2420992 : index}> : () -> index | |
%35 = "arith.constant"() <{value = 2019584 : index}> : () -> index | |
%36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
%37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
%38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
%39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
%40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
%41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32 | |
%43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32 | |
%44 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
%45 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%46 = "arith.constant"() <{value = 512 : index}> : () -> index | |
%47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
%48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>> | |
"memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> () | |
%49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>> | |
"memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> () | |
%50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>> | |
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> () | |
%51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8> | |
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> () | |
%52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%56, %45, %57) ({ | |
^bb0(%arg0: index): | |
%58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
%59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
"scf.for"(%58, %46, %59) ({ | |
^bb0(%arg1: index): | |
%60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
%61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
%62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
%63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg9: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg10: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg11: index): | |
"memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg5: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg6: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg7: index): | |
"scf.for"(%32, %28, %30) ({ | |
^bb0(%arg8: index): | |
%129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
%130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
%131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%132 = "arith.extsi"(%129) : (i8) -> i32 | |
%133 = "arith.extsi"(%130) : (i8) -> i32 | |
%134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
%135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
"memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg2: index): | |
"scf.for"(%32, %45, %30) ({ | |
^bb0(%arg3: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg4: index): | |
%64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
%65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%66 = "arith.sitofp"(%65) : (i32) -> f32 | |
%67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32 | |
%75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32 | |
%76 = "arith.fptosi"(%75) : (f32) -> i8 | |
%77 = "arith.extsi"(%76) : (i8) -> i32 | |
%78 = "arith.sitofp"(%77) : (i32) -> f32 | |
%79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32 | |
%84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32 | |
%86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32 | |
%87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32 | |
%88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32 | |
%89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32 | |
%90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32 | |
%91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32 | |
%92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32 | |
%93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32 | |
%95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32 | |
%96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32 | |
%97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32 | |
%98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32 | |
%99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32 | |
%100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32 | |
%101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32 | |
%102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32 | |
%103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32 | |
%104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32 | |
%116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32 | |
%118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32 | |
%127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32 | |
%128 = "arith.fptosi"(%127) : (f32) -> i8 | |
"memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
}) : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
%106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes | |
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation: | |
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
%28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
%29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%33 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
%34 = "arith.constant"() <{value = 86217280 : index}> : () -> index | |
%35 = "arith.constant"() <{value = 2408448 : index}> : () -> index | |
%36 = "arith.constant"() <{value = 401408 : index}> : () -> index | |
%37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32 | |
%38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
%39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
%40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
%41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
%42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
%43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32 | |
%45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32 | |
%46 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
%47 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%48 = "arith.constant"() <{value = 512 : index}> : () -> index | |
%49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
%50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>> | |
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> () | |
%51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>> | |
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> () | |
%52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>> | |
"memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> () | |
%53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>> | |
"memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> () | |
%54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%58, %47, %59) ({ | |
^bb0(%arg0: index): | |
%60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
%61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
"scf.for"(%60, %48, %61) ({ | |
^bb0(%arg1: index): | |
%62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
%63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
%64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
%65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg9: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg10: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg11: index): | |
"memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg5: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg6: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg7: index): | |
"scf.for"(%32, %28, %30) ({ | |
^bb0(%arg8: index): | |
%131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
%132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
%133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%134 = "arith.extsi"(%131) : (i8) -> i32 | |
%135 = "arith.extsi"(%132) : (i8) -> i32 | |
%136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
%137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
"memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg2: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg3: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg4: index): | |
%66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
%67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%68 = "arith.sitofp"(%67) : (i32) -> f32 | |
%69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32 | |
%77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32 | |
%78 = "arith.fptosi"(%77) : (f32) -> i8 | |
%79 = "arith.extsi"(%78) : (i8) -> i32 | |
%80 = "arith.sitofp"(%79) : (i32) -> f32 | |
%81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32 | |
%86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32 | |
%88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32 | |
%89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32 | |
%90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32 | |
%91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32 | |
%92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32 | |
%93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32 | |
%94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32 | |
%95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32 | |
%97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32 | |
%98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32 | |
%99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32 | |
%100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32 | |
%101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32 | |
%102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32 | |
%103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32 | |
%104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32 | |
%105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32 | |
%106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32 | |
%118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32 | |
%120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32 | |
%129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32 | |
%130 = "arith.fptosi"(%129) : (f32) -> i8 | |
"memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
^ | |
ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg12: !hal.device): | |
%138 = "arith.constant"() <{value = 8 : index}> : () -> index | |
%139 = "arith.constant"() <{value = 2 : index}> : () -> index | |
%140 = "arith.constant"() <{value = 1 : index}> : () -> index | |
"hal.return"(%138, %139, %140) : (index, index, index) -> () | |
}) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> () | |
"builtin.module"() ({ | |
"func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
%0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
%1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
%2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
%3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
%4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
%5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
%6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
%7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
%8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
%9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
%10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
%11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
%12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
%13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
%14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
%15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
%16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
%17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
%18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
%19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
%20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
%21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
%22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
%23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
%24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
%25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
%26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
%27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
%28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
%29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
%30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
%31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
%32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
%33 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
%34 = "arith.constant"() <{value = 86217280 : index}> : () -> index | |
%35 = "arith.constant"() <{value = 2408448 : index}> : () -> index | |
%36 = "arith.constant"() <{value = 401408 : index}> : () -> index | |
%37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32 | |
%38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
%39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
%40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
%41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
%42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
%43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32 | |
%45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32 | |
%46 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
%47 = "arith.constant"() <{value = 56 : index}> : () -> index | |
%48 = "arith.constant"() <{value = 512 : index}> : () -> index | |
%49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
%50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>> | |
"memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> () | |
%51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>> | |
"memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> () | |
%52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>> | |
"memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> () | |
%53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>> | |
"memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> () | |
%54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
%55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
%56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
%57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
%58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
%59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
"scf.for"(%58, %47, %59) ({ | |
^bb0(%arg0: index): | |
%60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
%61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
"scf.for"(%60, %48, %61) ({ | |
^bb0(%arg1: index): | |
%62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
%63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
%64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
%65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg9: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg10: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg11: index): | |
"memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg5: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg6: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg7: index): | |
"scf.for"(%32, %28, %30) ({ | |
^bb0(%arg8: index): | |
%131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
%132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
%133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%134 = "arith.extsi"(%131) : (i8) -> i32 | |
%135 = "arith.extsi"(%132) : (i8) -> i32 | |
%136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
%137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
"memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.for"(%32, %31, %30) ({ | |
^bb0(%arg2: index): | |
"scf.for"(%32, %47, %30) ({ | |
^bb0(%arg3: index): | |
"scf.for"(%32, %29, %30) ({ | |
^bb0(%arg4: index): | |
%66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
%67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
%68 = "arith.sitofp"(%67) : (i32) -> f32 | |
%69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32 | |
%77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32 | |
%78 = "arith.fptosi"(%77) : (f32) -> i8 | |
%79 = "arith.extsi"(%78) : (i8) -> i32 | |
%80 = "arith.sitofp"(%79) : (i32) -> f32 | |
%81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32 | |
%86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32 | |
%88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32 | |
%89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32 | |
%90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32 | |
%91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32 | |
%92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32 | |
%93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32 | |
%94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32 | |
%95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
%96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32 | |
%97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32 | |
%98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32 | |
%99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32 | |
%100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32 | |
%101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32 | |
%102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32 | |
%103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32 | |
%104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32 | |
%105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32 | |
%106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
%115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32 | |
%118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32 | |
%120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
%125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
%126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
%127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
%128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32 | |
%129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32 | |
%130 = "arith.fptosi"(%129) : (f32) -> i8 | |
"memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"scf.yield"() : () -> () | |
}) : (index, index, index) -> () | |
"func.return"() : () -> () | |
}) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
}) : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
%174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
^ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment