Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created September 25, 2024 15:57
Show Gist options
  • Save pashu123/7d469fe37fa64e4e4cd63f54ab616d5c to your computer and use it in GitHub Desktop.
Save pashu123/7d469fe37fa64e4e4cd63f54ab616d5c to your computer and use it in GitHub Desktop.
module {
func.func @_reduce_sum_2x3xi32_dim0_dispatch_0_generic_3x2_i32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<i32>>
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<2x3xi32>>
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<3xi32>>
%3 = flow.dispatch.tensor.load %0, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:tensor<i32>> -> tensor<i32>
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [2, 3], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x3xi32>> -> tensor<2x3xi32>
%5 = tensor.empty() : tensor<3xi32>
%extracted = tensor.extract %3[] : tensor<i32>
%6 = linalg.fill {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1], [1], [0], [0]]>} ins(%extracted : i32) outs(%5 : tensor<3xi32>) -> tensor<3xi32>
%7 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%4 : tensor<2x3xi32>) outs(%6 : tensor<3xi32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 0], [1, 0], [0, 1], [0, 0]]>} {
^bb0(%in: i32, %out: i32):
%8 = arith.addi %out, %in : i32
linalg.yield %8 : i32
} -> tensor<3xi32>
flow.dispatch.tensor.store %7, %2, offsets = [0], sizes = [3], strides = [1] : tensor<3xi32> -> !flow.dispatch.tensor<writeonly:tensor<3xi32>>
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment