Created
February 19, 2025 18:08
-
-
Save pashu123/cb2d7f4880d26acf400130eaaea6528f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hal.executable public @encode_prompts$async_dispatch_178 { | |
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb", {abi = "hip", iree.gpu.target = #iree_gpu.target<arch = "gfx942", features = "", wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8, subgroup = shuffle|arithmetic, dot = dp4xi8toi32, mma = [<MFMA_F32_16x16x4_F32>, <MFMA_F32_16x16x16_F16>, <MFMA_F32_32x32x8_F16>, <MFMA_F64_16x16x4_F64>, <MFMA_F32_16x16x16_BF16>, <MFMA_F32_32x32x8_BF16>, <MFMA_F32_16x16x32_F8E5M2FNUZ>, <MFMA_F32_16x16x32_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_I32_16x16x32_I8>, <MFMA_I32_32x32x16_I8>], subgroup_size_choices = [64], max_workgroup_sizes = [1024, 1024, 1024], max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, max_workgroup_counts = [2147483647, 2147483647, 2147483647], max_load_instruction_bits = 128, simds_per_wgp = 4, vgpr_space_bits = 16384>>, ukernels = "none"}>) { | |
hal.executable.export public @encode_prompts$async_dispatch_178_generic_64x1280_f32 ordinal(0) layout(#hal.pipeline.layout<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) { | |
^bb0(%arg0: !hal.device): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_slice | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @encode_prompts$async_dispatch_178_generic_64x1280_f32() { | |
%cst = arith.constant 1.280000e+03 : f32 | |
%cst_0 = arith.constant 0.000000e+00 : f32 | |
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32 | |
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32 | |
%2 = arith.index_castui %0 : i32 to index | |
%3 = arith.index_castui %1 : i32 to index | |
%4:2 = util.assume.int | |
%2[<umin = 688128, umax = 688128, udiv = 688128>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 589824, umax = 589824, udiv = 589824>, <umin = 753664, umax = 753664, udiv = 753664>], | |
%3[<umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 262144, umax = 262144, udiv = 262144>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 425984, umax = 425984, udiv = 425984>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>, <umin = 98304, umax = 98304, udiv = 98304>] | |
: index, index | |
%5 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> | |
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%4#1) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<64xf32>> | |
%7 = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [64, 1280], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1280xf16>> -> tensor<64x1280xf16> | |
%8 = tensor.empty() : tensor<64xf32> | |
%9 = tensor.empty() : tensor<64x1280xf32> | |
%10 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%7 : tensor<64x1280xf16>) outs(%9 : tensor<64x1280xf32>) { | |
^bb0(%in: f16, %out: f32): | |
%14 = arith.extf %in : f16 to f32 | |
linalg.yield %14 : f32 | |
} -> tensor<64x1280xf32> | |
%11 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<64xf32>) -> tensor<64xf32> | |
%12 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%10 : tensor<64x1280xf32>) outs(%11 : tensor<64xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%14 = arith.addf %in, %out : f32 | |
linalg.yield %14 : f32 | |
} -> tensor<64xf32> | |
%13 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%8 : tensor<64xf32>) { | |
^bb0(%in: f32, %out: f32): | |
%14 = arith.divf %in, %cst : f32 | |
linalg.yield %14 : f32 | |
} -> tensor<64xf32> | |
flow.dispatch.tensor.store %13, %6, offsets = [0], sizes = [64], strides = [1] : tensor<64xf32> -> !flow.dispatch.tensor<writeonly:tensor<64xf32>> | |
return | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment