pashu123 · April 23, 2025 23:36
diff --git a/test.mlir b/test.mlir
 #map = affine_map<(d0, d1, d2) -> (d2, d0)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #map3 = affine_map<(d0, d1) -> (d0, d1)>
 #map4 = affine_map<(d0, d1) -> (d1)>
 module {
  func.func @faulty(%arg0: tensor<2816x2xf16>, %arg1: tensor<1280x2816xf16>) -> tensor<2x1280xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %cst_0 = arith.constant 1.000000e+00 : f16
    %0 = tensor.empty() : tensor<2x1280xf16>
    %1 = tensor.empty() : tensor<2x1280xf32>
    %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x1280xf32>) -> tensor<2x1280xf32>
    %3 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<2816x2xf16>, tensor<1280x2816xf16>) outs(%2 : tensor<2x1280xf32>) {
    ^bb0(%in: f16, %in_1: f16, %out: f32):
      %5 = arith.extf %in : f16 to f32
      %6 = arith.extf %in_1 : f16 to f32
      %7 = arith.mulf %5, %6 : f32
      %8 = arith.addf %out, %7 : f32
      linalg.yield %8 : f32
    } -> tensor<2x1280xf32>
    return %3 : tensor<2x1280xf32>
  }
 }
	#map = affine_map<(d0, d1, d2) -> (d2, d0)>
	#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
	#map3 = affine_map<(d0, d1) -> (d0, d1)>
	#map4 = affine_map<(d0, d1) -> (d1)>
	module {
	func.func @faulty(%arg0: tensor<2816x2xf16>, %arg1: tensor<1280x2816xf16>) -> tensor<2x1280xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%cst_0 = arith.constant 1.000000e+00 : f16
	%0 = tensor.empty() : tensor<2x1280xf16>
	%1 = tensor.empty() : tensor<2x1280xf32>
	%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x1280xf32>) -> tensor<2x1280xf32>
	%3 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<2816x2xf16>, tensor<1280x2816xf16>) outs(%2 : tensor<2x1280xf32>) {
	^bb0(%in: f16, %in_1: f16, %out: f32):
	%5 = arith.extf %in : f16 to f32
	%6 = arith.extf %in_1 : f16 to f32
	%7 = arith.mulf %5, %6 : f32
	%8 = arith.addf %out, %7 : f32
	linalg.yield %8 : f32
	} -> tensor<2x1280xf32>
	return %3 : tensor<2x1280xf32>
	}
	}