Created
January 30, 2025 18:22
-
-
Save AmosLewis/3e8a2400d7a1b3da41e815a73e3daf73 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hal.executable public @__builtin_fill_i64 { | |
hal.executable.variant public @rocm_hsaco_fb target(<"rocm", "rocm-hsaco-fb", {abi = "hip", iree.gpu.target = #iree_gpu.target<arch = "gfx942", features = "", wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8, subgroup = shuffle|arithmetic, dot = dp4xi8toi32, mma = [<MFMA_F32_16x16x4_F32>, <MFMA_F32_16x16x16_F16>, <MFMA_F32_32x32x8_F16>, <MFMA_F64_16x16x4_F64>, <MFMA_F32_16x16x16_BF16>, <MFMA_F32_32x32x8_BF16>, <MFMA_F32_16x16x32_F8E5M2FNUZ>, <MFMA_F32_16x16x32_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ>, <MFMA_F32_16x16x32_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ>, <MFMA_F32_32x32x16_F8E5M2FNUZ_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ>, <MFMA_F32_32x32x16_F8E4M3FNUZ_F8E5M2FNUZ>, <MFMA_I32_16x16x32_I8>, <MFMA_I32_32x32x16_I8>], subgroup_size_choices = [64], max_workgroup_sizes = [1024, 1024, 1024], max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, max_workgroup_counts = [2147483647, 2147483647, 2147483647], max_load_instruction_bits = 128, simds_per_wgp = 4, vgpr_space_bits = 16384>>, ukernels = "none"}>) { | |
hal.executable.export public @__builtin_fill_i64 ordinal(0) layout(#hal.pipeline.layout<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) { | |
^bb0(%arg0: !hal.device, %arg1: index): | |
%x, %y, %z = flow.dispatch.workgroup_count_from_slice %arg1 | |
hal.return %x, %y, %z : index, index, index | |
} | |
builtin.module { | |
func.func @__builtin_fill_i64() { | |
%c1 = arith.constant 1 : index | |
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32 | |
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32 | |
%2 = arith.index_castui %0 : i32 to index | |
%3 = arith.extui %1 : i32 to i64 | |
%4:2 = util.assume.int | |
%2[<umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1096, umax = 1096, udiv = 1096>, <umin = 1104, umax = 1104, udiv = 1104>, <umin = 1160, umax = 1160, udiv = 1160>, <umin = 1168, umax = 1168, udiv = 1168>], | |
%3[<umin = 1, umax = 1>, <umin = 1, umax = 1>, <umin = 1, umax = 1>, <umin = 2, umax = 2, udiv = 2>, <umin = 1, umax = 1>, <umin = 3, umax = 3, udiv = 3>, <umin = 1, umax = 1>, <umin = 4, umax = 4, udiv = 4>, <umin = 1, umax = 1>, <umin = 5, umax = 5, udiv = 5>, <umin = 1, umax = 1>, <umin = 6, umax = 6, udiv = 6>, <umin = 1, umax = 1>, <umin = 7, umax = 7, udiv = 7>, <umin = 1, umax = 1>, <umin = 8, umax = 8, udiv = 8>, <umin = 1, umax = 1>, <umin = 9, umax = 9, udiv = 9>, <umin = 1, umax = 1>, <umin = 10, umax = 10, udiv = 10>, <umin = 1, umax = 1>, <umin = 11, umax = 11, udiv = 11>, <umin = 1, umax = 1>, <umin = 12, umax = 12, udiv = 12>, <umin = 1, umax = 1>, <umin = 13, umax = 13, udiv = 13>, <umin = 1, umax = 1>, <umin = 14, umax = 14, udiv = 14>, <umin = 1, umax = 1>, <umin = 15, umax = 15, udiv = 15>, <umin = 1, umax = 1>, <umin = 16, umax = 16, udiv = 16>, <umin = 1, umax = 1>, <umin = 17, umax = 17, udiv = 17>, <umin = 1, umax = 1>, <umin = 18, umax = 18, udiv = 18>, <umin = 1, umax = 1>, <umin = 19, umax = 19, udiv = 19>, <umin = 1, umax = 1>, <umin = 20, umax = 20, udiv = 20>, <umin = 1, umax = 1>, <umin = 21, umax = 21, udiv = 21>, <umin = 1, umax = 1>, <umin = 22, umax = 22, udiv = 22>, <umin = 1, umax = 1>, <umin = 23, umax = 23, udiv = 23>, <umin = 1, umax = 1>, <umin = 24, umax = 24, udiv = 24>, <umin = 1, umax = 1>, <umin = 25, umax = 25, udiv = 25>, <umin = 1, umax = 1>, <umin = 26, umax = 26, udiv = 26>, <umin = 1, umax = 1>, <umin = 27, umax = 27, udiv = 27>, <umin = 1, umax = 1>, <umin = 28, umax = 28, udiv = 28>, <umin = 1, umax = 1>, <umin = 29, umax = 29, udiv = 29>, <umin = 1, umax = 1>, <umin = 30, umax = 30, udiv = 30>, <umin = 1, umax = 1>, <umin = 31, umax = 31, udiv = 31>, <umin = 1, umax = 1>] | |
: index, i64 | |
%5 = flow.dispatch.workload.ordinal %c1, 0 : index | |
%6 = hal.interface.binding.subspan layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%4#0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%5} | |
%7 = tensor.empty(%5) : tensor<?xi64> | |
%8 = linalg.fill ins(%4#1 : i64) outs(%7 : tensor<?xi64>) -> tensor<?xi64> | |
flow.dispatch.tensor.store %8, %6, offsets = [0], sizes = [%5], strides = [1] : tensor<?xi64> -> !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%5} | |
return | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment