Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created October 16, 2024 14:56
Show Gist options
  • Save pashu123/8edc0cb1896d666f62e4f72829f94995 to your computer and use it in GitHub Desktop.
Save pashu123/8edc0cb1896d666f62e4f72829f94995 to your computer and use it in GitHub Desktop.
func.func @scatter_dispatch_0_scatter_2x2xi32_dispatch_tensor_store() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
%c0 = arith.constant 0 : index
%0:3 = util.assume.int
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>
: index, index, index
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%0#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<2xi32>>
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%0#1) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<2x2xi32>>
%3 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%0#2) flags(Indirect) : !flow.dispatch.tensor<readwrite:tensor<2x2xi32>>
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [2], strides = [1] : !flow.dispatch.tensor<readonly:tensor<2xi32>> -> tensor<2xi32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [2, 2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2xi32>> -> tensor<2x2xi32>
%6 = flow.dispatch.tensor.load %3, offsets = [0, 0], sizes = [2, 2], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<2x2xi32>> -> tensor<2x2xi32>
%7 = scf.forall (%arg0) in (2) shared_outs(%arg1 = %6) -> (tensor<2x2xi32>) {
%extracted_slice = tensor.extract_slice %4[%arg0] [1] [1] : tensor<2xi32> to tensor<1xi32>
%extracted_slice_0 = tensor.extract_slice %5[%arg0, 0] [1, 2] [1, 1] : tensor<2x2xi32> to tensor<1x2xi32>
%8 = iree_linalg_ext.scatter {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1]]>} dimension_map = [0, 1] unique_indices(true) ins(%extracted_slice, %extracted_slice_0 : tensor<1xi32>, tensor<1x2xi32>) outs(%arg1 : tensor<2x2xi32>) {
^bb0(%arg2: i32, %arg3: i32):
iree_linalg_ext.yield %arg2 : i32
} -> tensor<2x2xi32>
scf.forall.in_parallel {
tensor.parallel_insert_slice %8 into %arg1[0, 0] [2, 2] [1, 1] : tensor<2x2xi32> into tensor<2x2xi32>
}
} {mapping = [#iree_codegen.workgroup_mapping<x>]}
flow.dispatch.tensor.store %7, %3, offsets = [0, 0], sizes = [2, 2], strides = [1, 1] : tensor<2x2xi32> -> !flow.dispatch.tensor<readwrite:tensor<2x2xi32>>
return
}
// -----// IR Dump After IREEComprehensiveBufferizePass (iree-codegen-iree-comprehensive-bufferize) //----- //
func.func @scatter_dispatch_0_scatter_2x2xi32_dispatch_tensor_store() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
%c0 = arith.constant 0 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<2x2xi32>
%0:3 = util.assume.int
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>
: index, index, index
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%0#0) flags("ReadOnly|Indirect") : memref<2xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %1, 1 : memref<2xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%0#1) flags("ReadOnly|Indirect") : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %2, 1 : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%3 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%0#2) flags(Indirect) : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
memref.assume_alignment %3, 1 : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
scf.forall (%arg0) in (2) {
%subview = memref.subview %1[%arg0] [1] [1] : memref<2xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>
%subview_0 = memref.subview %2[%arg0, 0] [1, 2] [1, 1] : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<1x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
iree_linalg_ext.scatter {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1]]>} dimension_map = [0, 1] unique_indices(true) ins(%subview, %subview_0 : memref<1xi32, strided<[1], offset: ?>, #hal.descriptor_type<storage_buffer>>, memref<1x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%alloca : memref<2x2xi32>) {
^bb0(%arg1: i32, %arg2: i32):
iree_linalg_ext.yield %arg1 : i32
}
%subview_1 = memref.subview %3[0, 0] [2, 2] [1, 1] : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>> to memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>
linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%alloca : memref<2x2xi32>) outs(%subview_1 : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
} {mapping = [#iree_codegen.workgroup_mapping<x>]}
linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%3 : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) outs(%3 : memref<2x2xi32, strided<[2, 1], offset: ?>, #hal.descriptor_type<storage_buffer>>) {
^bb0(%in: i32, %out: i32):
linalg.yield %in : i32
}
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment