Created
June 12, 2024 03:35
-
-
Save AmosLewis/87a575233f9fbec9d9fa7fc0279480bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| failed to translate executables | |
| failed to translate executables | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes | |
| %106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
| func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation: | |
| "func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
| %0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
| %1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
| %2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
| %3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
| %4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
| %5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
| %6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
| %7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
| %8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
| %9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
| %10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
| %11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
| %12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
| %13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
| %14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
| %15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
| %16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
| %17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
| %18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
| %19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
| %20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
| %21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
| %22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
| %23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
| %24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
| %25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
| %26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
| %27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
| %28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
| %29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
| %30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| %31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
| %32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
| %33 = "arith.constant"() <{value = 86213696 : index}> : () -> index | |
| %34 = "arith.constant"() <{value = 2420992 : index}> : () -> index | |
| %35 = "arith.constant"() <{value = 2019584 : index}> : () -> index | |
| %36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
| %37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
| %38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
| %39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
| %40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
| %41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
| %42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32 | |
| %43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32 | |
| %44 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
| %45 = "arith.constant"() <{value = 56 : index}> : () -> index | |
| %46 = "arith.constant"() <{value = 512 : index}> : () -> index | |
| %47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
| %48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>> | |
| "memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> () | |
| %49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>> | |
| "memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> () | |
| %50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>> | |
| "memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> () | |
| %51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8> | |
| "memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> () | |
| %52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
| %53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
| %54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
| %55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
| %56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| %57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| "scf.for"(%56, %45, %57) ({ | |
| ^bb0(%arg0: index): | |
| %58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| %59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| "scf.for"(%58, %46, %59) ({ | |
| ^bb0(%arg1: index): | |
| %60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
| %61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
| %62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
| %63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg9: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg10: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg11: index): | |
| "memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg5: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg6: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg7: index): | |
| "scf.for"(%32, %28, %30) ({ | |
| ^bb0(%arg8: index): | |
| %129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
| %130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
| %131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %132 = "arith.extsi"(%129) : (i8) -> i32 | |
| %133 = "arith.extsi"(%130) : (i8) -> i32 | |
| %134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| %135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| "memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg2: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg3: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg4: index): | |
| %64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
| %65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %66 = "arith.sitofp"(%65) : (i32) -> f32 | |
| %67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32 | |
| %75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32 | |
| %76 = "arith.fptosi"(%75) : (f32) -> i8 | |
| %77 = "arith.extsi"(%76) : (i8) -> i32 | |
| %78 = "arith.sitofp"(%77) : (i32) -> f32 | |
| %79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32 | |
| %84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32 | |
| %86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32 | |
| %87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32 | |
| %88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32 | |
| %89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32 | |
| %90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32 | |
| %91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32 | |
| %92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32 | |
| %93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32 | |
| %95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32 | |
| %96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32 | |
| %97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32 | |
| %98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32 | |
| %99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32 | |
| %100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32 | |
| %101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32 | |
| %102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32 | |
| %103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32 | |
| %104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32 | |
| %116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32 | |
| %118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32 | |
| %127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32 | |
| %128 = "arith.fptosi"(%127) : (f32) -> i8 | |
| "memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "func.return"() : () -> () | |
| }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
| %106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
| %106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
| func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:979:12: note: see current operation: | |
| "hal.executable.variant"() ({ | |
| "hal.executable.export"() ({ | |
| ^bb0(%arg12: !hal.device): | |
| %136 = "arith.constant"() <{value = 8 : index}> : () -> index | |
| %137 = "arith.constant"() <{value = 2 : index}> : () -> index | |
| %138 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| "hal.return"(%136, %137, %138) : (index, index, index) -> () | |
| }) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> () | |
| "builtin.module"() ({ | |
| "func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_13_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
| %0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
| %1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
| %2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
| %3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
| %4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
| %5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
| %6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
| %7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
| %8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
| %9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
| %10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
| %11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
| %12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
| %13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
| %14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
| %15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
| %16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
| %17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
| %18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
| %19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
| %20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
| %21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
| %22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
| %23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
| %24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
| %25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
| %26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
| %27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
| %28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
| %29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
| %30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| %31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
| %32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
| %33 = "arith.constant"() <{value = 86213696 : index}> : () -> index | |
| %34 = "arith.constant"() <{value = 2420992 : index}> : () -> index | |
| %35 = "arith.constant"() <{value = 2019584 : index}> : () -> index | |
| %36 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
| %37 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
| %38 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
| %39 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
| %40 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
| %41 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
| %42 = "arith.constant"() <{value = 1.250000e-01 : f32}> : () -> f32 | |
| %43 = "arith.constant"() <{value = 9.765625E-4 : f32}> : () -> f32 | |
| %44 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
| %45 = "arith.constant"() <{value = 56 : index}> : () -> index | |
| %46 = "arith.constant"() <{value = 512 : index}> : () -> index | |
| %47 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
| %48 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>> | |
| "memref.assume_alignment"(%48) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>) -> () | |
| %49 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>> | |
| "memref.assume_alignment"(%49) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>) -> () | |
| %50 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21553424>> | |
| "memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21553424>>) -> () | |
| %51 = "hal.interface.binding.subspan"(%32) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8> | |
| "memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x56x512xi8>) -> () | |
| %52 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
| %53 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
| %54 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
| %55 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
| %56 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| %57 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| "scf.for"(%56, %45, %57) ({ | |
| ^bb0(%arg0: index): | |
| %58 = "affine.apply"(%52) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| %59 = "affine.apply"(%53) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| "scf.for"(%58, %46, %59) ({ | |
| ^bb0(%arg1: index): | |
| %60 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
| %61 = "memref.subview"(%50, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21553424>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
| %62 = "memref.subview"(%48, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 2019584>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
| %63 = "memref.subview"(%49, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2420992>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg9: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg10: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg11: index): | |
| "memref.store"(%44, %47, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg5: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg6: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg7: index): | |
| "scf.for"(%32, %28, %30) ({ | |
| ^bb0(%arg8: index): | |
| %129 = "memref.load"(%62, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
| %130 = "memref.load"(%63, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
| %131 = "memref.load"(%47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %132 = "arith.extsi"(%129) : (i8) -> i32 | |
| %133 = "arith.extsi"(%130) : (i8) -> i32 | |
| %134 = "arith.muli"(%132, %133) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| %135 = "arith.addi"(%131, %134) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| "memref.store"(%135, %47, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg2: index): | |
| "scf.for"(%32, %45, %30) ({ | |
| ^bb0(%arg3: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg4: index): | |
| %64 = "memref.load"(%61, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
| %65 = "memref.load"(%47, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %66 = "arith.sitofp"(%65) : (i32) -> f32 | |
| %67 = "arith.mulf"(%66, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %68 = "arith.addf"(%64, %67) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %69 = "arith.divf"(%68, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %70 = "math.round"(%69) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %71 = "arith.addf"(%70, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %72 = "arith.cmpf"(%71, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %73 = "arith.cmpf"(%71, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %74 = "arith.select"(%72, %40, %71) : (i1, f32, f32) -> f32 | |
| %75 = "arith.select"(%73, %39, %74) : (i1, f32, f32) -> f32 | |
| %76 = "arith.fptosi"(%75) : (f32) -> i8 | |
| %77 = "arith.extsi"(%76) : (i8) -> i32 | |
| %78 = "arith.sitofp"(%77) : (i32) -> f32 | |
| %79 = "arith.mulf"(%78, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %80 = "arith.divf"(%79, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %81 = "arith.cmpf"(%80, %41) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %82 = "arith.negf"(%80) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %83 = "arith.select"(%81, %82, %80) : (i1, f32, f32) -> f32 | |
| %84 = "arith.cmpf"(%83, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %85 = "arith.select"(%84, %27, %23) : (i1, f32, f32) -> f32 | |
| %86 = "arith.select"(%84, %14, %10) : (i1, f32, f32) -> f32 | |
| %87 = "arith.select"(%84, %26, %22) : (i1, f32, f32) -> f32 | |
| %88 = "arith.select"(%84, %13, %9) : (i1, f32, f32) -> f32 | |
| %89 = "arith.select"(%84, %25, %21) : (i1, f32, f32) -> f32 | |
| %90 = "arith.select"(%84, %12, %8) : (i1, f32, f32) -> f32 | |
| %91 = "arith.select"(%84, %24, %20) : (i1, f32, f32) -> f32 | |
| %92 = "arith.select"(%84, %11, %7) : (i1, f32, f32) -> f32 | |
| %93 = "arith.cmpf"(%83, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %94 = "arith.select"(%93, %41, %19) : (i1, f32, f32) -> f32 | |
| %95 = "arith.select"(%93, %85, %18) : (i1, f32, f32) -> f32 | |
| %96 = "arith.select"(%93, %86, %6) : (i1, f32, f32) -> f32 | |
| %97 = "arith.select"(%93, %87, %17) : (i1, f32, f32) -> f32 | |
| %98 = "arith.select"(%93, %88, %5) : (i1, f32, f32) -> f32 | |
| %99 = "arith.select"(%93, %89, %16) : (i1, f32, f32) -> f32 | |
| %100 = "arith.select"(%93, %90, %4) : (i1, f32, f32) -> f32 | |
| %101 = "arith.select"(%93, %91, %15) : (i1, f32, f32) -> f32 | |
| %102 = "arith.select"(%93, %92, %3) : (i1, f32, f32) -> f32 | |
| %103 = "arith.select"(%93, %41, %37) : (i1, f32, f32) -> f32 | |
| %104 = "arith.cmpf"(%83, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %105 = "math.fma"(%83, %101, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %106 = "math.fma"(%83, %105, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %107 = "math.fma"(%83, %106, %95) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %108 = "math.fma"(%83, %107, %94) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %109 = "math.fma"(%83, %102, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %110 = "math.fma"(%83, %109, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %111 = "math.fma"(%83, %110, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %112 = "math.fma"(%83, %111, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %113 = "arith.divf"(%108, %112) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %114 = "arith.addf"(%103, %113) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %115 = "arith.select"(%104, %114, %37) : (i1, f32, f32) -> f32 | |
| %116 = "arith.negf"(%115) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %117 = "arith.select"(%81, %116, %115) : (i1, f32, f32) -> f32 | |
| %118 = "arith.addf"(%117, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %119 = "arith.mulf"(%79, %118) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %120 = "arith.mulf"(%119, %36) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %121 = "arith.divf"(%120, %42) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %122 = "math.round"(%121) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %123 = "arith.addf"(%122, %41) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %124 = "arith.cmpf"(%123, %40) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %125 = "arith.cmpf"(%123, %39) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %126 = "arith.select"(%124, %40, %123) : (i1, f32, f32) -> f32 | |
| %127 = "arith.select"(%125, %39, %126) : (i1, f32, f32) -> f32 | |
| %128 = "arith.fptosi"(%127) : (f32) -> i8 | |
| "memref.store"(%128, %60, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "func.return"() : () -> () | |
| }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
| }) : () -> () | |
| "hal.executable.variant_end"() : () -> () | |
| }) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
| %106 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: 'func.func' op exceeded stack allocation limit of 32768 bytes for function. Got 401408 bytes | |
| %174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
| func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation: | |
| "func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
| %0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
| %1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
| %2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
| %3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
| %4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
| %5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
| %6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
| %7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
| %8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
| %9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
| %10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
| %11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
| %12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
| %13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
| %14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
| %15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
| %16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
| %17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
| %18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
| %19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
| %20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
| %21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
| %22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
| %23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
| %24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
| %25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
| %26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
| %27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
| %28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
| %29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
| %30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| %31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
| %32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
| %33 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
| %34 = "arith.constant"() <{value = 86217280 : index}> : () -> index | |
| %35 = "arith.constant"() <{value = 2408448 : index}> : () -> index | |
| %36 = "arith.constant"() <{value = 401408 : index}> : () -> index | |
| %37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32 | |
| %38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
| %39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
| %40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
| %41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
| %42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
| %43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
| %44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32 | |
| %45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32 | |
| %46 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
| %47 = "arith.constant"() <{value = 56 : index}> : () -> index | |
| %48 = "arith.constant"() <{value = 512 : index}> : () -> index | |
| %49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
| %50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>> | |
| "memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> () | |
| %51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>> | |
| "memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> () | |
| %52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>> | |
| "memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> () | |
| %53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>> | |
| "memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> () | |
| %54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
| %55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
| %56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
| %57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
| %58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| %59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| "scf.for"(%58, %47, %59) ({ | |
| ^bb0(%arg0: index): | |
| %60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| %61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| "scf.for"(%60, %48, %61) ({ | |
| ^bb0(%arg1: index): | |
| %62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
| %63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
| %64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
| %65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg9: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg10: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg11: index): | |
| "memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg5: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg6: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg7: index): | |
| "scf.for"(%32, %28, %30) ({ | |
| ^bb0(%arg8: index): | |
| %131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
| %132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
| %133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %134 = "arith.extsi"(%131) : (i8) -> i32 | |
| %135 = "arith.extsi"(%132) : (i8) -> i32 | |
| %136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| %137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| "memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg2: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg3: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg4: index): | |
| %66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
| %67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %68 = "arith.sitofp"(%67) : (i32) -> f32 | |
| %69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32 | |
| %77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32 | |
| %78 = "arith.fptosi"(%77) : (f32) -> i8 | |
| %79 = "arith.extsi"(%78) : (i8) -> i32 | |
| %80 = "arith.sitofp"(%79) : (i32) -> f32 | |
| %81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32 | |
| %86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32 | |
| %88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32 | |
| %89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32 | |
| %90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32 | |
| %91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32 | |
| %92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32 | |
| %93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32 | |
| %94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32 | |
| %95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32 | |
| %97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32 | |
| %98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32 | |
| %99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32 | |
| %100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32 | |
| %101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32 | |
| %102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32 | |
| %103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32 | |
| %104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32 | |
| %105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32 | |
| %106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32 | |
| %118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32 | |
| %120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32 | |
| %129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32 | |
| %130 = "arith.fptosi"(%129) : (f32) -> i8 | |
| "memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "func.return"() : () -> () | |
| }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
| %174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
| %174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:24:3: note: called from | |
| func.func @torch_jit(%arg0: tensor<1x3x224x224xf32>) -> tensor<1x1000xf32> { | |
| ^ | |
| ConvNeXt_vaiq_int8.default.onnx.linalg.mlir:1434:12: note: see current operation: | |
| "hal.executable.variant"() ({ | |
| "hal.executable.export"() ({ | |
| ^bb0(%arg12: !hal.device): | |
| %138 = "arith.constant"() <{value = 8 : index}> : () -> index | |
| %139 = "arith.constant"() <{value = 2 : index}> : () -> index | |
| %140 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| "hal.return"(%138, %139, %140) : (index, index, index) -> () | |
| }) {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>], layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"} : () -> () | |
| "builtin.module"() ({ | |
| "func.func"() <{function_type = () -> (), sym_name = "torch_jit_dispatch_24_quantized_batch_matmul_56x56x512x128_i8xi8xi32xi32xi32"}> ({ | |
| %0 = "arith.constant"() <{value = 3.750000e+00 : f32}> : () -> f32 | |
| %1 = "arith.constant"() <{value = 2.000000e+00 : f32}> : () -> f32 | |
| %2 = "arith.constant"() <{value = 8.000000e-01 : f32}> : () -> f32 | |
| %3 = "arith.constant"() <{value = 0.0821908935 : f32}> : () -> f32 | |
| %4 = "arith.constant"() <{value = -0.583389878 : f32}> : () -> f32 | |
| %5 = "arith.constant"() <{value = 1.62705934 : f32}> : () -> f32 | |
| %6 = "arith.constant"() <{value = -2.0606916 : f32}> : () -> f32 | |
| %7 = "arith.constant"() <{value = 0.0572442785 : f32}> : () -> f32 | |
| %8 = "arith.constant"() <{value = -0.0883462652 : f32}> : () -> f32 | |
| %9 = "arith.constant"() <{value = 0.448369086 : f32}> : () -> f32 | |
| %10 = "arith.constant"() <{value = -3.276070e-01 : f32}> : () -> f32 | |
| %11 = "arith.constant"() <{value = 0.0739796459 : f32}> : () -> f32 | |
| %12 = "arith.constant"() <{value = -0.131808966 : f32}> : () -> f32 | |
| %13 = "arith.constant"() <{value = 0.519230127 : f32}> : () -> f32 | |
| %14 = "arith.constant"() <{value = -0.463513821 : f32}> : () -> f32 | |
| %15 = "arith.constant"() <{value = -1.71048032E-5 : f32}> : () -> f32 | |
| %16 = "arith.constant"() <{value = 2.53447099E-4 : f32}> : () -> f32 | |
| %17 = "arith.constant"() <{value = -0.00141373626 : f32}> : () -> f32 | |
| %18 = "arith.constant"() <{value = 0.00351961935 : f32}> : () -> f32 | |
| %19 = "arith.constant"() <{value = -0.00330093061 : f32}> : () -> f32 | |
| %20 = "arith.constant"() <{value = 0.0370645523 : f32}> : () -> f32 | |
| %21 = "arith.constant"() <{value = 0.118407398 : f32}> : () -> f32 | |
| %22 = "arith.constant"() <{value = -0.364721417 : f32}> : () -> f32 | |
| %23 = "arith.constant"() <{value = 1.12750685 : f32}> : () -> f32 | |
| %24 = "arith.constant"() <{value = 0.0258146804 : f32}> : () -> f32 | |
| %25 = "arith.constant"() <{value = 0.209741712 : f32}> : () -> f32 | |
| %26 = "arith.constant"() <{value = -0.523018539 : f32}> : () -> f32 | |
| %27 = "arith.constant"() <{value = 1.12837911 : f32}> : () -> f32 | |
| %28 = "arith.constant"() <{value = 128 : index}> : () -> index | |
| %29 = "arith.constant"() <{value = 64 : index}> : () -> index | |
| %30 = "arith.constant"() <{value = 1 : index}> : () -> index | |
| %31 = "arith.constant"() <{value = 28 : index}> : () -> index | |
| %32 = "arith.constant"() <{value = 0 : index}> : () -> index | |
| %33 = "arith.constant"() <{value = 802816 : index}> : () -> index | |
| %34 = "arith.constant"() <{value = 86217280 : index}> : () -> index | |
| %35 = "arith.constant"() <{value = 2408448 : index}> : () -> index | |
| %36 = "arith.constant"() <{value = 401408 : index}> : () -> index | |
| %37 = "arith.constant"() <{value = 3.125000e-02 : f32}> : () -> f32 | |
| %38 = "arith.constant"() <{value = 5.000000e-01 : f32}> : () -> f32 | |
| %39 = "arith.constant"() <{value = 1.000000e+00 : f32}> : () -> f32 | |
| %40 = "arith.constant"() <{value = 1.41421354 : f32}> : () -> f32 | |
| %41 = "arith.constant"() <{value = 1.270000e+02 : f32}> : () -> f32 | |
| %42 = "arith.constant"() <{value = -1.280000e+02 : f32}> : () -> f32 | |
| %43 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
| %44 = "arith.constant"() <{value = 6.250000e-02 : f32}> : () -> f32 | |
| %45 = "arith.constant"() <{value = 4.8828125E-4 : f32}> : () -> f32 | |
| %46 = "arith.constant"() <{value = 0 : i32}> : () -> i32 | |
| %47 = "arith.constant"() <{value = 56 : index}> : () -> index | |
| %48 = "arith.constant"() <{value = 512 : index}> : () -> index | |
| %49 = "memref.alloca"() <{alignment = 64 : i64, operandSegmentSizes = array<i32: 0, 0>}> : () -> memref<28x56x64xi32> | |
| %50 = "hal.interface.binding.subspan"(%36) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>> | |
| "memref.assume_alignment"(%50) <{alignment = 64 : i32}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>) -> () | |
| %51 = "hal.interface.binding.subspan"(%35) {alignment = 64 : index, binding = 0 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>> | |
| "memref.assume_alignment"(%51) <{alignment = 64 : i32}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>) -> () | |
| %52 = "hal.interface.binding.subspan"(%34) {alignment = 64 : index, binding = 1 : index, descriptor_flags = 1 : i32, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<512xf32, strided<[1], offset: 21554320>> | |
| "memref.assume_alignment"(%52) <{alignment = 64 : i32}> : (memref<512xf32, strided<[1], offset: 21554320>>) -> () | |
| %53 = "hal.interface.binding.subspan"(%33) {alignment = 64 : index, binding = 2 : index, descriptor_type = #hal.descriptor_type<storage_buffer>, operandSegmentSizes = array<i32: 1, 0>, set = 0 : index} : (index) -> memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>> | |
| "memref.assume_alignment"(%53) <{alignment = 64 : i32}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>) -> () | |
| %54 = "hal.interface.workgroup.id"() {dimension = 0 : index} : () -> index | |
| %55 = "hal.interface.workgroup.count"() {dimension = 0 : index} : () -> index | |
| %56 = "hal.interface.workgroup.id"() {dimension = 1 : index} : () -> index | |
| %57 = "hal.interface.workgroup.count"() {dimension = 1 : index} : () -> index | |
| %58 = "affine.apply"(%56) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| %59 = "affine.apply"(%57) <{map = affine_map<()[s0] -> (s0 * 28)>}> : (index) -> index | |
| "scf.for"(%58, %47, %59) ({ | |
| ^bb0(%arg0: index): | |
| %60 = "affine.apply"(%54) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| %61 = "affine.apply"(%55) <{map = affine_map<()[s0] -> (s0 * 64)>}> : (index) -> index | |
| "scf.for"(%60, %48, %61) ({ | |
| ^bb0(%arg1: index): | |
| %62 = "memref.subview"(%53, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 56, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x512xi8, strided<[28672, 512, 1], offset: 802816>>, index, index) -> memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>> | |
| %63 = "memref.subview"(%52, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 64>, static_strides = array<i64: 1>}> : (memref<512xf32, strided<[1], offset: 21554320>>, index) -> memref<64xf32, strided<[1], offset: ?>> | |
| %64 = "memref.subview"(%50, %arg0) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, 0>, static_sizes = array<i64: 28, 56, 128>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x56x128xi8, strided<[7168, 128, 1], offset: 401408>>, index) -> memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>> | |
| %65 = "memref.subview"(%51, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 2, 0, 0>, static_offsets = array<i64: -9223372036854775808, 0, -9223372036854775808>, static_sizes = array<i64: 28, 128, 64>, static_strides = array<i64: 1, 1, 1>}> : (memref<56x128x512xi8, strided<[65536, 512, 1], offset: 2408448>>, index, index) -> memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>> | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg9: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg10: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg11: index): | |
| "memref.store"(%46, %49, %arg9, %arg10, %arg11) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg5: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg6: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg7: index): | |
| "scf.for"(%32, %28, %30) ({ | |
| ^bb0(%arg8: index): | |
| %131 = "memref.load"(%64, %arg5, %arg6, %arg8) <{nontemporal = false}> : (memref<28x56x128xi8, strided<[7168, 128, 1], offset: ?>>, index, index, index) -> i8 | |
| %132 = "memref.load"(%65, %arg5, %arg8, %arg7) <{nontemporal = false}> : (memref<28x128x64xi8, strided<[65536, 512, 1], offset: ?>>, index, index, index) -> i8 | |
| %133 = "memref.load"(%49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %134 = "arith.extsi"(%131) : (i8) -> i32 | |
| %135 = "arith.extsi"(%132) : (i8) -> i32 | |
| %136 = "arith.muli"(%134, %135) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| %137 = "arith.addi"(%133, %136) <{overflowFlags = #arith.overflow<none>}> : (i32, i32) -> i32 | |
| "memref.store"(%137, %49, %arg5, %arg6, %arg7) <{nontemporal = false}> : (i32, memref<28x56x64xi32>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.for"(%32, %31, %30) ({ | |
| ^bb0(%arg2: index): | |
| "scf.for"(%32, %47, %30) ({ | |
| ^bb0(%arg3: index): | |
| "scf.for"(%32, %29, %30) ({ | |
| ^bb0(%arg4: index): | |
| %66 = "memref.load"(%63, %arg4) <{nontemporal = false}> : (memref<64xf32, strided<[1], offset: ?>>, index) -> f32 | |
| %67 = "memref.load"(%49, %arg2, %arg3, %arg4) <{nontemporal = false}> : (memref<28x56x64xi32>, index, index, index) -> i32 | |
| %68 = "arith.sitofp"(%67) : (i32) -> f32 | |
| %69 = "arith.mulf"(%68, %45) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %70 = "arith.addf"(%66, %69) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %71 = "arith.divf"(%70, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %72 = "math.round"(%71) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %73 = "arith.addf"(%72, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %74 = "arith.cmpf"(%73, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %75 = "arith.cmpf"(%73, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %76 = "arith.select"(%74, %42, %73) : (i1, f32, f32) -> f32 | |
| %77 = "arith.select"(%75, %41, %76) : (i1, f32, f32) -> f32 | |
| %78 = "arith.fptosi"(%77) : (f32) -> i8 | |
| %79 = "arith.extsi"(%78) : (i8) -> i32 | |
| %80 = "arith.sitofp"(%79) : (i32) -> f32 | |
| %81 = "arith.mulf"(%80, %44) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %82 = "arith.divf"(%81, %40) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %83 = "arith.cmpf"(%82, %43) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %84 = "arith.negf"(%82) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %85 = "arith.select"(%83, %84, %82) : (i1, f32, f32) -> f32 | |
| %86 = "arith.cmpf"(%85, %2) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %87 = "arith.select"(%86, %27, %23) : (i1, f32, f32) -> f32 | |
| %88 = "arith.select"(%86, %14, %10) : (i1, f32, f32) -> f32 | |
| %89 = "arith.select"(%86, %26, %22) : (i1, f32, f32) -> f32 | |
| %90 = "arith.select"(%86, %13, %9) : (i1, f32, f32) -> f32 | |
| %91 = "arith.select"(%86, %25, %21) : (i1, f32, f32) -> f32 | |
| %92 = "arith.select"(%86, %12, %8) : (i1, f32, f32) -> f32 | |
| %93 = "arith.select"(%86, %24, %20) : (i1, f32, f32) -> f32 | |
| %94 = "arith.select"(%86, %11, %7) : (i1, f32, f32) -> f32 | |
| %95 = "arith.cmpf"(%85, %1) <{fastmath = #arith.fastmath<none>, predicate = 4 : i64}> : (f32, f32) -> i1 | |
| %96 = "arith.select"(%95, %43, %19) : (i1, f32, f32) -> f32 | |
| %97 = "arith.select"(%95, %87, %18) : (i1, f32, f32) -> f32 | |
| %98 = "arith.select"(%95, %88, %6) : (i1, f32, f32) -> f32 | |
| %99 = "arith.select"(%95, %89, %17) : (i1, f32, f32) -> f32 | |
| %100 = "arith.select"(%95, %90, %5) : (i1, f32, f32) -> f32 | |
| %101 = "arith.select"(%95, %91, %16) : (i1, f32, f32) -> f32 | |
| %102 = "arith.select"(%95, %92, %4) : (i1, f32, f32) -> f32 | |
| %103 = "arith.select"(%95, %93, %15) : (i1, f32, f32) -> f32 | |
| %104 = "arith.select"(%95, %94, %3) : (i1, f32, f32) -> f32 | |
| %105 = "arith.select"(%95, %43, %39) : (i1, f32, f32) -> f32 | |
| %106 = "arith.cmpf"(%85, %0) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %107 = "math.fma"(%85, %103, %101) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %108 = "math.fma"(%85, %107, %99) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %109 = "math.fma"(%85, %108, %97) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %110 = "math.fma"(%85, %109, %96) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %111 = "math.fma"(%85, %104, %102) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %112 = "math.fma"(%85, %111, %100) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %113 = "math.fma"(%85, %112, %98) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %114 = "math.fma"(%85, %113, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32, f32) -> f32 | |
| %115 = "arith.divf"(%110, %114) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %116 = "arith.addf"(%105, %115) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %117 = "arith.select"(%106, %116, %39) : (i1, f32, f32) -> f32 | |
| %118 = "arith.negf"(%117) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %119 = "arith.select"(%83, %118, %117) : (i1, f32, f32) -> f32 | |
| %120 = "arith.addf"(%119, %39) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %121 = "arith.mulf"(%81, %120) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %122 = "arith.mulf"(%121, %38) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %123 = "arith.divf"(%122, %37) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %124 = "math.round"(%123) <{fastmath = #arith.fastmath<none>}> : (f32) -> f32 | |
| %125 = "arith.addf"(%124, %43) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32 | |
| %126 = "arith.cmpf"(%125, %42) <{fastmath = #arith.fastmath<none>, predicate = 11 : i64}> : (f32, f32) -> i1 | |
| %127 = "arith.cmpf"(%125, %41) <{fastmath = #arith.fastmath<none>, predicate = 9 : i64}> : (f32, f32) -> i1 | |
| %128 = "arith.select"(%126, %42, %125) : (i1, f32, f32) -> f32 | |
| %129 = "arith.select"(%127, %41, %128) : (i1, f32, f32) -> f32 | |
| %130 = "arith.fptosi"(%129) : (f32) -> i8 | |
| "memref.store"(%130, %62, %arg2, %arg3, %arg4) <{nontemporal = false}> : (i8, memref<28x56x64xi8, strided<[28672, 512, 1], offset: ?>>, index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "scf.yield"() : () -> () | |
| }) : (index, index, index) -> () | |
| "func.return"() : () -> () | |
| }) {translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> () | |
| }) : () -> () | |
| "hal.executable.variant_end"() : () -> () | |
| }) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
| %174 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173 : tensor<1x56x56x512xf32>) outs(%98 : tensor<1x56x56x512xi8>) { | |
| ^ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment