pashu123’s gists

pashu123 / new_ir.mlir

Created June 27, 2024 16:17

	util.func public @matmul_broad(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_broad(%input0: tensor<?x?x3200xf32>, %input1: tensor<8640x3200xf16>) -> (%output0: tensor<?x?x8640xf32>)"}} {
	%cst = arith.constant 0.000000e+00 : f16
	%c1 = arith.constant 1 : index
	%c0 = arith.constant 0 : index
	%cst_0 = arith.constant 0.000000e+00 : f32
	%0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
	%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
	%2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?x3200xf32>{%0, %1}
	%3 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<8640x3200xf16>
	%4 = tensor.empty(%0) : tensor<?x8640x3200xf16>

pashu123 / new_ir.mlir

Created June 27, 2024 16:25

	%46 = linalg.batch_mmt4d {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 0, 16, 16, 0], [0, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0]]>} ins(%41, %42 : tensor<?x?x3200x16x1xf32>, tensor<?x540x3200x16x1xf16>) outs(%45 : tensor<?x?x540x16x16xf32>) -> tensor<?x?x540x16x16xf32>
	util.func public @matmul_broad(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_broad(%input0: tensor<?x?x3200xf32>, %input1: tensor<8640x3200xf16>) -> (%output0: tensor<?x?x8640xf32>)"}} {
	%cst = arith.constant 0.000000e+00 : f16
	%c1 = arith.constant 1 : index
	%c0 = arith.constant 0 : index
	%cst_0 = arith.constant 0.000000e+00 : f32
	%0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
	%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index
	%2 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<?x?x

pashu123 / ir.mlir

Created June 28, 2024 22:11

	#matmul_config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 1, 0, 16, 16, 0], [0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0]]>
	#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+avx512f", native_vector_size = 64 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
	func.func @mmt4d_bias_relu_fusion_dispatch_0_generic_DxDx16x16_f32() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
	%c0 = arith.constant 0 : index
	%c32_i64 = arith.constant 32 : i64
	%cst = arith.constant 0.000000e+00 : f32
	%0 = hal.interface.constant.load[0] : i32
	%1 = hal.interface.constant.load[1] : i32
	%2 = hal.interface.constant.load[2] : i32
	%3 = hal.interface.constant.load[3] : i32

Created August 9, 2024 18:42

	func.func @matmul_accumulate_DYNxDYNxf16_times_DYNxDYNxf16_into_DYNxDYNxf16(%lhs: tensor<?x?xf16>, %rhs: tensor<?x?xf16>, %acc: tensor<?x?xf16>) -> tensor<?x?xf16> {
	%result = linalg.matmul ins(%lhs, %rhs: tensor<?x?xf16>, tensor<?x?xf16>) outs(%acc: tensor<?x?xf16>) -> tensor<?x?xf16>

	return %result: tensor<?x?xf16>
	}

	func.func @matmul_accumulate_1x1xf16_times_1x1xf16_into_1x1xf16(%lhs: tensor<1x1xf16>, %rhs: tensor<1x1xf16>, %acc: tensor<1x1xf16>) -> tensor<1x1xf16> {
	%result = linalg.matmul ins(%lhs, %rhs: tensor<1x1xf16>, tensor<1x1xf16>) outs(%acc: tensor<1x1xf16>) -> tensor<1x1xf16>

	return %result: tensor<1x1xf16>

pashu123 / double_generic.mlir

Created August 12, 2024 12:34

	#map = affine_map<(d0) -> (d0)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	func.func @main_graph_dispatch_47_elementwise_64x56x56_f32(%arg0: tensor<200704xi8>, %arg1: tensor<64x56x56xf32>) -> tensor<64x56x56xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%cst_0 = arith.constant -1.280000e+02 : f32
	%cst_1 = arith.constant 1.270000e+02 : f32
	%cst_2 = arith.constant 1.562500e-02 : f32
	%0 = tensor.empty() : tensor<64x56x56xf32>
	%1 = tensor.empty() : tensor<200704xf32>

pashu123 / bug.mlir

Created August 19, 2024 10:56

	#map = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
	#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
	#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
	module {
	util.func public @matmul_broad(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub, iree.reflection = {iree.abi.declaration = "sync func @matmul_broad(%input0: tensor<?x?x3200xf32>, %input1: tensor<8640x3200xf16>) -> (%output0: tensor<?x?x8640xf32>)"}} {
	%cst = arith.constant 0.000000e+00 : f32
	%0 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[0] : index
	%1 = hal.buffer_view.dim<%arg0 : !hal.buffer_view>[1] : index

pashu123 / 2ndbug.mlir

Created August 19, 2024 16:46

	func.func @matmul_broad_dispatch_1_set_encoding_LHS_DxDx3200() {
	%c0 = arith.constant 0 : index
	%c32_i64 = arith.constant 32 : i64
	%0 = hal.interface.constant.load layout(<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, Indirect>], flags = Indirect>]>) ordinal(0) : i32
	%1 = hal.interface.constant.load layout(<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, Indirect>], flags = Indirect>]>) ordinal(1) : i32
	%2 = hal.interface.constant.load layout(<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, Indirect>], flags = Indirect>]>) ordinal(2) : i32
	%3 = hal.interface.constant.load layout(<push_constants = 4, sets = [<0, bindings = [<0, storage_buffer, Indirect>], flags = Indirect>]>) ordinal(3) : i32
	%4 = arith.extui %0 : i32 to i64
	%5 = arith.extui %1 : i32 to i64
	%6 = arith.shli %5, %c32_i64 : i64

pashu123 / all.txt

Created August 19, 2024 17:36

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipeline (iree-auto-input-conversion) //----- //
	#map = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	util.func public @matmul_broad(%arg0: tensor<?x?x3200xf32>, %arg1: tensor<8640x3200xf16>) -> tensor<?x?x8640xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index
	%dim = tensor.dim %arg0, %c0 : tensor<?x?x3200xf32>
	%dim_0 = tensor.dim %arg0, %c1 : tensor<?x?x3200xf32>

pashu123 / all.txt

Created August 20, 2024 10:58

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipeline (iree-auto-input-conversion) //----- //
	#map = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	util.func public @matmul_broad(%arg0: tensor<?x?x3200xf32>, %arg1: tensor<8640x3200xf16>) -> tensor<?x?x8640xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index
	%dim = tensor.dim %arg0, %c0 : tensor<?x?x3200xf32>
	%dim_0 = tensor.dim %arg0, %c1 : tensor<?x?x3200xf32>

pashu123 / test.mlir

Created September 6, 2024 15:50

	func.func @matmul_fusion_test(%arg0 : tensor<?x?xf32>,
	%arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
	%c0 = arith.constant 0 : index
	%c1 = arith.constant 1 : index
	%c2 = arith.constant 2 : index
	%cst0 = arith.constant 0.0 : f32
	%M = tensor.dim %arg0, %c0 : tensor<?x?xf32>
	%N = tensor.dim %arg1, %c1 : tensor<?x?xf32>
	%K = tensor.dim %arg0, %c1 : tensor<?x?xf32>
	%empty = tensor.empty(%M, %N) : tensor<?x?xf32>

Prashant Kumar pashu123