Created
May 14, 2025 22:05
-
-
Save pashu123/93b6076aa80d9ceebbc63b8b3a210ae0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hal.executable public @decode_bs4$async_dispatch_28 { | |
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb", {abi = "hip", iree.gpu.target = #iree_gpu.target<arch = "gfx942", features = "", wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8, subgroup = shuffle|arithmetic, dot = dp4xi8toi32, mma = [<MFMA_F32_16x16x16_BF16>, <MFMA_F32_32x32x8_BF16>, <MFMA_F32_16x16x32_F8E5M2FNUZ>, <MFMA_F32_16x16x32_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_I32_16x16x32_I8>, <MFMA_I32_32x32x16_I8>, <MFMA_F64_16x16x4_F64>, <MFMA_F32_16x16x4_F32>, <MFMA_F32_16x16x16_F16>, <MFMA_F32_32x32x8_F16>], subgroup_size_choices = [64], max_workgroup_sizes = [1024, 1024, 1024], max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, max_workgroup_counts = [2147483647, 2147483647, 2147483647], max_load_instruction_bits = 128, simds_per_wgp = 4, vgpr_space_bits = 16384>>, ukernels = "none"}>) { | |
hal.executable.export public @decode_bs4$async_dispatch_28_elementwise_4x4096_bf16xf32xf32xf32xf32xf8E4M3FNUZ ordinal(0) layout(#hal.pipeline.layout<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) count(%arg0: !hal.device) -> (index, index, index) { | |
%x, %y, %z = iree_tensor_ext.dispatch.workgroup_count_from_slice | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @decode_bs4$async_dispatch_28_elementwise_4x4096_bf16xf32xf32xf32xf32xf8E4M3FNUZ() { | |
%c32_i64 = arith.constant 32 : i64 | |
%cst = arith.constant 2.400000e+02 : f32 | |
%cst_0 = arith.constant -2.400000e+02 : f32 | |
%cst_1 = arith.constant 9.99999974E-6 : f32 | |
%cst_2 = arith.constant 4.096000e+03 : f32 | |
%cst_3 = arith.constant 0.000000e+00 : f32 | |
%c2_i64 = arith.constant 2 : i64 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32 | |
%1 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32 | |
%2 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(2) : i32 | |
%3 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(3) : i32 | |
%4 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(4) : i32 | |
%5 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(5) : i32 | |
%6 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(6) : i32 | |
%7 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(7) : i32 | |
%8 = hal.interface.constant.load layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(8) : i32 | |
%9 = arith.index_castui %0 : i32 to index | |
%10 = arith.index_castui %1 : i32 to index | |
%11 = arith.index_castui %2 : i32 to index | |
%12 = arith.extui %3 : i32 to i64 | |
%13 = arith.extui %4 : i32 to i64 | |
%14 = arith.shli %13, %c32_i64 : i64 | |
%15 = arith.ori %12, %14 : i64 | |
%16 = arith.index_castui %15 {stream.alignment = 128 : index, stream.values = [1268794240 : index, 1310745984 : index, 1486915328 : index, 1528867072 : index, 1705036416 : index, 1746988160 : index, 1923157504 : index, 1965109248 : index, 2141278592 : index, 2183230336 : index, 2359399680 : index, 2401351424 : index, 2577520768 : index, 2619472512 : index, 2795641856 : index, 2837593600 : index, 3013762944 : index, 3055714688 : index, 3231884032 : index, 3273835776 : index, 3450005120 : index, 3491956864 : index, 3668126208 : index, 3710077952 : index, 3886247296 : index, 3928199040 : index, 4104368384 : index, 4146320128 : index, 4322489472 : index, 4364441216 : index, 4540610560 : index, 4582562304 : index, 4758731648 : index, 4800683392 : index, 4976852736 : index, 5018804480 : index, 5194973824 : index, 5236925568 : index, 5413094912 : index, 5455046656 : index, 5631216000 : index, 5673167744 : index, 5849337088 : index, 5891288832 : index, 6067458176 : index, 6109409920 : index, 6285579264 : index, 6327531008 : index, 6503700352 : index, 6545652096 : index, 6721821440 : index, 6763773184 : index, 6939942528 : index, 6981894272 : index, 7158063616 : index, 7200015360 : index, 7376184704 : index, 7418136448 : index, 7594305792 : index, 7636257536 : index, 7812426880 : index, 7854378624 : index]} : i64 to index | |
%17 = arith.extui %5 : i32 to i64 | |
%18 = arith.extui %6 : i32 to i64 | |
%19 = arith.shli %18, %c32_i64 : i64 | |
%20 = arith.ori %17, %19 : i64 | |
%21 = arith.index_castui %20 {stream.alignment = 128 : index, stream.values = [1268802432 : index, 1310754176 : index, 1486923520 : index, 1528875264 : index, 1705044608 : index, 1746996352 : index, 1923165696 : index, 1965117440 : index, 2141286784 : index, 2183238528 : index, 2359407872 : index, 2401359616 : index, 2577528960 : index, 2619480704 : index, 2795650048 : index, 2837601792 : index, 3013771136 : index, 3055722880 : index, 3231892224 : index, 3273843968 : index, 3450013312 : index, 3491965056 : index, 3668134400 : index, 3710086144 : index, 3886255488 : index, 3928207232 : index, 4104376576 : index, 4146328320 : index, 4322497664 : index, 4364449408 : index, 4540618752 : index, 4582570496 : index, 4758739840 : index, 4800691584 : index, 4976860928 : index, 5018812672 : index, 5194982016 : index, 5236933760 : index, 5413103104 : index, 5455054848 : index, 5631224192 : index, 5673175936 : index, 5849345280 : index, 5891297024 : index, 6067466368 : index, 6109418112 : index, 6285587456 : index, 6327539200 : index, 6503708544 : index, 6545660288 : index, 6721829632 : index, 6763781376 : index, 6939950720 : index, 6981902464 : index, 7158071808 : index, 7200023552 : index, 7376192896 : index, 7418144640 : index, 7594313984 : index, 7636265728 : index, 7812435072 : index, 7854386816 : index]} : i64 to index | |
%22 = arith.index_castui %7 : i32 to index | |
%23 = arith.index_castui %8 : i32 to index | |
%24:7 = util.assume.int | |
%9[<umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>], | |
%10[<umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 106624, umax = 106624, udiv = 106624>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 114816, umax = 114816, udiv = 114816>], | |
%11[<umin = 1056960, umax = 1056960, udiv = 1056960>, <umin = 1057024, umax = 1057024, udiv = 1057024>, <umin = 1057216, umax = 1057216, udiv = 1057216>, <umin = 1057280, umax = 1057280, udiv = 1057280>, <umin = 1057472, umax = 1057472, udiv = 1057472>, <umin = 1057536, umax = 1057536, udiv = 1057536>, <umin = 1057728, umax = 1057728, udiv = 1057728>, <umin = 1057792, umax = 1057792, udiv = 1057792>, <umin = 1057984, umax = 1057984, udiv = 1057984>, <umin = 1058048, umax = 1058048, udiv = 1058048>, <umin = 1058240, umax = 1058240, udiv = 1058240>, <umin = 1058304, umax = 1058304, udiv = 1058304>, <umin = 1058496, umax = 1058496, udiv = 1058496>, <umin = 1058560, umax = 1058560, udiv = 1058560>, <umin = 1058752, umax = 1058752, udiv = 1058752>, <umin = 1058816, umax = 1058816, udiv = 1058816>, <umin = 1059008, umax = 1059008, udiv = 1059008>, <umin = 1059072, umax = 1059072, udiv = 1059072>, <umin = 1059264, umax = 1059264, udiv = 1059264>, <umin = 1059328, umax = 1059328, udiv = 1059328>, <umin = 1059520, umax = 1059520, udiv = 1059520>, <umin = 1059584, umax = 1059584, udiv = 1059584>, <umin = 1059776, umax = 1059776, udiv = 1059776>, <umin = 1059840, umax = 1059840, udiv = 1059840>, <umin = 1060032, umax = 1060032, udiv = 1060032>, <umin = 1060096, umax = 1060096, udiv = 1060096>, <umin = 1060288, umax = 1060288, udiv = 1060288>, <umin = 1060352, umax = 1060352, udiv = 1060352>, <umin = 1060544, umax = 1060544, udiv = 1060544>, <umin = 1060608, umax = 1060608, udiv = 1060608>, <umin = 1060800, umax = 1060800, udiv = 1060800>, <umin = 1060864, umax = 1060864, udiv = 1060864>, <umin = 1061056, umax = 1061056, udiv = 1061056>, <umin = 1061120, umax = 1061120, udiv = 1061120>, <umin = 1061312, umax = 1061312, udiv = 1061312>, <umin = 1061376, umax = 1061376, udiv = 1061376>, <umin = 1061568, umax = 1061568, udiv = 1061568>, <umin = 1061632, umax = 1061632, udiv = 1061632>, <umin = 1061824, umax = 1061824, udiv = 1061824>, <umin = 1061888, umax = 1061888, udiv = 1061888>, <umin = 1062080, umax = 1062080, udiv = 1062080>, <umin = 1062144, umax = 1062144, udiv = 1062144>, <umin = 1062336, umax = 1062336, udiv = 1062336>, <umin = 1062400, umax = 1062400, udiv = 1062400>, <umin = 1062592, umax = 1062592, udiv = 1062592>, <umin = 1062656, umax = 1062656, udiv = 1062656>, <umin = 1062848, umax = 1062848, udiv = 1062848>, <umin = 1062912, umax = 1062912, udiv = 1062912>, <umin = 1063104, umax = 1063104, udiv = 1063104>, <umin = 1063168, umax = 1063168, udiv = 1063168>, <umin = 1063360, umax = 1063360, udiv = 1063360>, <umin = 1063424, umax = 1063424, udiv = 1063424>, <umin = 1063616, umax = 1063616, udiv = 1063616>, <umin = 1063680, umax = 1063680, udiv = 1063680>, <umin = 1063872, umax = 1063872, udiv = 1063872>, <umin = 1063936, umax = 1063936, udiv = 1063936>, <umin = 1064128, umax = 1064128, udiv = 1064128>, <umin = 1064192, umax = 1064192, udiv = 1064192>, <umin = 1064384, umax = 1064384, udiv = 1064384>, <umin = 1064448, umax = 1064448, udiv = 1064448>, <umin = 1064640, umax = 1064640, udiv = 1064640>, <umin = 1064704, umax = 1064704, udiv = 1064704>], | |
%16[<umin = 1268794240, umax = 1268794240, udiv = 1268794240>, <umin = 1310745984, umax = 1310745984, udiv = 1310745984>, <umin = 1486915328, umax = 1486915328, udiv = 1486915328>, <umin = 1528867072, umax = 1528867072, udiv = 1528867072>, <umin = 1705036416, umax = 1705036416, udiv = 1705036416>, <umin = 1746988160, umax = 1746988160, udiv = 1746988160>, <umin = 1923157504, umax = 1923157504, udiv = 1923157504>, <umin = 1965109248, umax = 1965109248, udiv = 1965109248>, <umin = 2141278592, umax = 2141278592, udiv = 2141278592>, <umin = 2183230336, umax = 2183230336, udiv = 2183230336>, <umin = 2359399680, umax = 2359399680, udiv = 2359399680>, <umin = 2401351424, umax = 2401351424, udiv = 2401351424>, <umin = 2577520768, umax = 2577520768, udiv = 2577520768>, <umin = 2619472512, umax = 2619472512, udiv = 2619472512>, <umin = 2795641856, umax = 2795641856, udiv = 2795641856>, <umin = 2837593600, umax = 2837593600, udiv = 2837593600>, <umin = 3013762944, umax = 3013762944, udiv = 3013762944>, <umin = 3055714688, umax = 3055714688, udiv = 3055714688>, <umin = 3231884032, umax = 3231884032, udiv = 3231884032>, <umin = 3273835776, umax = 3273835776, udiv = 3273835776>, <umin = 3450005120, umax = 3450005120, udiv = 3450005120>, <umin = 3491956864, umax = 3491956864, udiv = 3491956864>, <umin = 3668126208, umax = 3668126208, udiv = 3668126208>, <umin = 3710077952, umax = 3710077952, udiv = 3710077952>, <umin = 3886247296, umax = 3886247296, udiv = 3886247296>, <umin = 3928199040, umax = 3928199040, udiv = 3928199040>, <umin = 4104368384, umax = 4104368384, udiv = 4104368384>, <umin = 4146320128, umax = 4146320128, udiv = 4146320128>, <umin = 4322489472, umax = 4322489472, udiv = 4322489472>, <umin = 4364441216, umax = 4364441216, udiv = 4364441216>, <umin = 4540610560, umax = 4540610560, udiv = 4540610560>, <umin = 4582562304, umax = 4582562304, udiv = 4582562304>, <umin = 4758731648, umax = 4758731648, udiv = 4758731648>, <umin = 4800683392, umax = 4800683392, udiv = 4800683392>, <umin = 4976852736, umax = 4976852736, udiv = 4976852736>, <umin = 5018804480, umax = 5018804480, udiv = 5018804480>, <umin = 5194973824, umax = 5194973824, udiv = 5194973824>, <umin = 5236925568, umax = 5236925568, udiv = 5236925568>, <umin = 5413094912, umax = 5413094912, udiv = 5413094912>, <umin = 5455046656, umax = 5455046656, udiv = 5455046656>, <umin = 5631216000, umax = 5631216000, udiv = 5631216000>, <umin = 5673167744, umax = 5673167744, udiv = 5673167744>, <umin = 5849337088, umax = 5849337088, udiv = 5849337088>, <umin = 5891288832, umax = 5891288832, udiv = 5891288832>, <umin = 6067458176, umax = 6067458176, udiv = 6067458176>, <umin = 6109409920, umax = 6109409920, udiv = 6109409920>, <umin = 6285579264, umax = 6285579264, udiv = 6285579264>, <umin = 6327531008, umax = 6327531008, udiv = 6327531008>, <umin = 6503700352, umax = 6503700352, udiv = 6503700352>, <umin = 6545652096, umax = 6545652096, udiv = 6545652096>, <umin = 6721821440, umax = 6721821440, udiv = 6721821440>, <umin = 6763773184, umax = 6763773184, udiv = 6763773184>, <umin = 6939942528, umax = 6939942528, udiv = 6939942528>, <umin = 6981894272, umax = 6981894272, udiv = 6981894272>, <umin = 7158063616, umax = 7158063616, udiv = 7158063616>, <umin = 7200015360, umax = 7200015360, udiv = 7200015360>, <umin = 7376184704, umax = 7376184704, udiv = 7376184704>, <umin = 7418136448, umax = 7418136448, udiv = 7418136448>, <umin = 7594305792, umax = 7594305792, udiv = 7594305792>, <umin = 7636257536, umax = 7636257536, udiv = 7636257536>, <umin = 7812426880, umax = 7812426880, udiv = 7812426880>, <umin = 7854378624, umax = 7854378624, udiv = 7854378624>], | |
%21[<umin = 1268802432, umax = 1268802432, udiv = 1268802432>, <umin = 1310754176, umax = 1310754176, udiv = 1310754176>, <umin = 1486923520, umax = 1486923520, udiv = 1486923520>, <umin = 1528875264, umax = 1528875264, udiv = 1528875264>, <umin = 1705044608, umax = 1705044608, udiv = 1705044608>, <umin = 1746996352, umax = 1746996352, udiv = 1746996352>, <umin = 1923165696, umax = 1923165696, udiv = 1923165696>, <umin = 1965117440, umax = 1965117440, udiv = 1965117440>, <umin = 2141286784, umax = 2141286784, udiv = 2141286784>, <umin = 2183238528, umax = 2183238528, udiv = 2183238528>, <umin = 2359407872, umax = 2359407872, udiv = 2359407872>, <umin = 2401359616, umax = 2401359616, udiv = 2401359616>, <umin = 2577528960, umax = 2577528960, udiv = 2577528960>, <umin = 2619480704, umax = 2619480704, udiv = 2619480704>, <umin = 2795650048, umax = 2795650048, udiv = 2795650048>, <umin = 2837601792, umax = 2837601792, udiv = 2837601792>, <umin = 3013771136, umax = 3013771136, udiv = 3013771136>, <umin = 3055722880, umax = 3055722880, udiv = 3055722880>, <umin = 3231892224, umax = 3231892224, udiv = 3231892224>, <umin = 3273843968, umax = 3273843968, udiv = 3273843968>, <umin = 3450013312, umax = 3450013312, udiv = 3450013312>, <umin = 3491965056, umax = 3491965056, udiv = 3491965056>, <umin = 3668134400, umax = 3668134400, udiv = 3668134400>, <umin = 3710086144, umax = 3710086144, udiv = 3710086144>, <umin = 3886255488, umax = 3886255488, udiv = 3886255488>, <umin = 3928207232, umax = 3928207232, udiv = 3928207232>, <umin = 4104376576, umax = 4104376576, udiv = 4104376576>, <umin = 4146328320, umax = 4146328320, udiv = 4146328320>, <umin = 4322497664, umax = 4322497664, udiv = 4322497664>, <umin = 4364449408, umax = 4364449408, udiv = 4364449408>, <umin = 4540618752, umax = 4540618752, udiv = 4540618752>, <umin = 4582570496, umax = 4582570496, udiv = 4582570496>, <umin = 4758739840, umax = 4758739840, udiv = 4758739840>, <umin = 4800691584, umax = 4800691584, udiv = 4800691584>, <umin = 4976860928, umax = 4976860928, udiv = 4976860928>, <umin = 5018812672, umax = 5018812672, udiv = 5018812672>, <umin = 5194982016, umax = 5194982016, udiv = 5194982016>, <umin = 5236933760, umax = 5236933760, udiv = 5236933760>, <umin = 5413103104, umax = 5413103104, udiv = 5413103104>, <umin = 5455054848, umax = 5455054848, udiv = 5455054848>, <umin = 5631224192, umax = 5631224192, udiv = 5631224192>, <umin = 5673175936, umax = 5673175936, udiv = 5673175936>, <umin = 5849345280, umax = 5849345280, udiv = 5849345280>, <umin = 5891297024, umax = 5891297024, udiv = 5891297024>, <umin = 6067466368, umax = 6067466368, udiv = 6067466368>, <umin = 6109418112, umax = 6109418112, udiv = 6109418112>, <umin = 6285587456, umax = 6285587456, udiv = 6285587456>, <umin = 6327539200, umax = 6327539200, udiv = 6327539200>, <umin = 6503708544, umax = 6503708544, udiv = 6503708544>, <umin = 6545660288, umax = 6545660288, udiv = 6545660288>, <umin = 6721829632, umax = 6721829632, udiv = 6721829632>, <umin = 6763781376, umax = 6763781376, udiv = 6763781376>, <umin = 6939950720, umax = 6939950720, udiv = 6939950720>, <umin = 6981902464, umax = 6981902464, udiv = 6981902464>, <umin = 7158071808, umax = 7158071808, udiv = 7158071808>, <umin = 7200023552, umax = 7200023552, udiv = 7200023552>, <umin = 7376192896, umax = 7376192896, udiv = 7376192896>, <umin = 7418144640, umax = 7418144640, udiv = 7418144640>, <umin = 7594313984, umax = 7594313984, udiv = 7594313984>, <umin = 7636265728, umax = 7636265728, udiv = 7636265728>, <umin = 7812435072, umax = 7812435072, udiv = 7812435072>, <umin = 7854386816, umax = 7854386816, udiv = 7854386816>], | |
%22[<umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 114816, umax = 114816, udiv = 114816>, <umin = 49280, umax = 49280, udiv = 49280>, <umin = 180352, umax = 180352, udiv = 180352>], | |
%23[<umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 311424, umax = 311424, udiv = 311424>, <umin = 180352, umax = 180352, udiv = 180352>, <umin = 245888, umax = 245888, udiv = 245888>, <umin = 245888, umax = 245888, udiv = 245888>] | |
: index, index, index, index, index, index, index | |
%25 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%24#0) flags("ReadOnly|Indirect") : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x4096xf32>> | |
%26 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%24#1) flags("ReadOnly|Indirect") : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x4096xf32>> | |
%27 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%24#2) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<f32>> | |
%28 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%24#3) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4096xbf16>> | |
%29 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%24#4) flags(ReadOnly) : !iree_tensor_ext.dispatch.tensor<readonly:tensor<f32>> | |
%30 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(3) alignment(64) offset(%24#5) flags(Indirect) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf32>> | |
%31 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(4) alignment(64) offset(%24#6) flags(Indirect) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf32>> | |
%32 = hal.interface.binding.subspan layout(<constants = 9, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, ReadOnly>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(5) alignment(64) offset(%c0) flags(Indirect) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf8E4M3FNUZ>> | |
%33 = iree_tensor_ext.dispatch.tensor.load %25, offsets = [0, 0], sizes = [4, 4096], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x4096xf32>> -> tensor<4x4096xf32> | |
%34 = iree_tensor_ext.dispatch.tensor.load %26, offsets = [0, 0], sizes = [4, 4096], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4x4096xf32>> -> tensor<4x4096xf32> | |
%35 = iree_tensor_ext.dispatch.tensor.load %27, offsets = [], sizes = [], strides = [] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32> | |
%36 = iree_tensor_ext.dispatch.tensor.load %28, offsets = [0], sizes = [4096], strides = [1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<4096xbf16>> -> tensor<4096xbf16> | |
%37 = iree_tensor_ext.dispatch.tensor.load %29, offsets = [], sizes = [], strides = [] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<f32>> -> tensor<f32> | |
%38 = tensor.empty() : tensor<4x4096xf8E4M3FNUZ> | |
%39 = tensor.empty() : tensor<4xf32> | |
%40 = tensor.empty() : tensor<4x4096xf32> | |
%41 = linalg.fill ins(%cst_3 : f32) outs(%39 : tensor<4xf32>) -> tensor<4xf32> | |
%42 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%33, %34, %35 : tensor<4x4096xf32>, tensor<4x4096xf32>, tensor<f32>) outs(%40 : tensor<4x4096xf32>) { | |
^bb0(%in: f32, %in_4: f32, %in_5: f32, %out: f32): | |
%45 = arith.mulf %in_4, %in_5 : f32 | |
%46 = arith.addf %in, %45 : f32 | |
linalg.yield %46 : f32 | |
} -> tensor<4x4096xf32> | |
%43 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%42 : tensor<4x4096xf32>) outs(%41 : tensor<4xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%45 = math.fpowi %in, %c2_i64 : f32, i64 | |
%46 = arith.addf %45, %out : f32 | |
linalg.yield %46 : f32 | |
} -> tensor<4xf32> | |
%44:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%36, %42, %43, %37 : tensor<4096xbf16>, tensor<4x4096xf32>, tensor<4xf32>, tensor<f32>) outs(%40, %38 : tensor<4x4096xf32>, tensor<4x4096xf8E4M3FNUZ>) { | |
^bb0(%in: bf16, %in_4: f32, %in_5: f32, %in_6: f32, %out: f32, %out_7: f8E4M3FNUZ): | |
%45 = arith.divf %in_5, %cst_2 : f32 | |
%46 = arith.addf %45, %cst_1 : f32 | |
%47 = math.rsqrt %46 : f32 | |
%48 = arith.mulf %in_4, %47 : f32 | |
%49 = arith.extf %in : bf16 to f32 | |
%50 = arith.mulf %49, %48 : f32 | |
%51 = arith.divf %50, %in_6 : f32 | |
%52 = arith.cmpf ult, %51, %cst_0 : f32 | |
%53 = arith.select %52, %cst_0, %51 : f32 | |
%54 = arith.cmpf ugt, %53, %cst : f32 | |
%55 = arith.select %54, %cst, %53 : f32 | |
%56 = arith.truncf %55 : f32 to f8E4M3FNUZ | |
linalg.yield %50, %56 : f32, f8E4M3FNUZ | |
} -> (tensor<4x4096xf32>, tensor<4x4096xf8E4M3FNUZ>) | |
iree_tensor_ext.dispatch.tensor.store %42, %30, offsets = [0, 0], sizes = [4, 4096], strides = [1, 1] : tensor<4x4096xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf32>> | |
iree_tensor_ext.dispatch.tensor.store %44#0, %31, offsets = [0, 0], sizes = [4, 4096], strides = [1, 1] : tensor<4x4096xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf32>> | |
iree_tensor_ext.dispatch.tensor.store %44#1, %32, offsets = [0, 0], sizes = [4, 4096], strides = [1, 1] : tensor<4x4096xf8E4M3FNUZ> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<4x4096xf8E4M3FNUZ>> | |
return | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment