Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created August 19, 2024 10:56
Show Gist options
  • Save pashu123/e417e05bce68fe87094565c1eda0927d to your computer and use it in GitHub Desktop.
Save pashu123/e417e05bce68fe87094565c1eda0927d to your computer and use it in GitHub Desktop.
#map = affine_map<(d0, d1, d2) -> (d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
module {
util.func public @matmul_broad(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_broad(%input0: tensor<?x?x3200xf32>, %input1: tensor<8640x3200xf16>) -> (%output0: tensor<?x?x8640xf32>)"}} {
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
%2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?x3200xf32>{%0, %1}
%3 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<8640x3200xf16>
%4 = flow.dispatch.region -> (tensor<?x8640x3200xf16>{%0}) {
%9 = tensor.empty(%0) : tensor<?x8640x3200xf16>
%10 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%3 : tensor<8640x3200xf16>) outs(%9 : tensor<?x8640x3200xf16>) {
^bb0(%in: f16, %out: f16):
linalg.yield %in : f16
} -> tensor<?x8640x3200xf16>
flow.return %10 : tensor<?x8640x3200xf16>
}
%5 = iree_encoding.set_encoding %4 : tensor<?x8640x3200xf16> -> tensor<?x8640x3200xf16, #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x8640x3200xf16>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>
%6 = iree_encoding.set_encoding %2 : tensor<?x?x3200xf32> -> tensor<?x?x3200xf32, #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x3200xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>
%7 = flow.dispatch.region -> (tensor<?x?x8640xf32>{%0, %1}) {
%9 = tensor.empty(%0, %1) : tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>
%10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>) -> tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>
%11 = linalg.batch_matmul_transpose_b ins(%6, %5 : tensor<?x?x3200xf32, #iree_encoding.encoding<operand_index = 0 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x3200xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>, tensor<?x8640x3200xf16, #iree_encoding.encoding<operand_index = 1 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x8640x3200xf16>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>) outs(%10 : tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>) -> tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>>
%12 = iree_encoding.unset_encoding %11 : tensor<?x?x8640xf32, #iree_encoding.encoding<operand_index = 2 : index, op_type = matmul, element_types = [f32, f16, f32], original_type = tensor<?x?x8640xf32>, user_indexing_maps = [#map2, #map3, #map4], round_dims_to = array<i64: 32, 32, 32>>> -> tensor<?x?x8640xf32>
%extracted_slice = tensor.extract_slice %12[0, 0, 0] [%0, %1, 8640] [1, 1, 1] : tensor<?x?x8640xf32> to tensor<?x?x8640xf32>
flow.return %extracted_slice : tensor<?x?x8640xf32>
}
%8 = hal.tensor.export %7 "output0" : tensor<?x?x8640xf32>{%0, %1} -> !hal.buffer_view
util.return %8 : !hal.buffer_view
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment