pashu123 · August 12, 2024 12:34
diff --git a/double_generic.mlir b/double_generic.mlir
 #map = affine_map<(d0) -> (d0)>
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 module {
  func.func @main_graph_dispatch_47_elementwise_64x56x56_f32(%arg0: tensor<200704xi8>, %arg1: tensor<64x56x56xf32>) -> tensor<64x56x56xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %cst_0 = arith.constant -1.280000e+02 : f32
    %cst_1 = arith.constant 1.270000e+02 : f32
    %cst_2 = arith.constant 1.562500e-02 : f32
    %0 = tensor.empty() : tensor<64x56x56xf32>
    %1 = tensor.empty() : tensor<200704xf32>
    %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<200704xi8>) outs(%1 : tensor<200704xf32>) {
    ^bb0(%in: i8, %out: f32):
      %4 = arith.extsi %in : i8 to i32
      %5 = arith.sitofp %4 : i32 to f32
      %6 = arith.mulf %5, %cst_2 : f32
      linalg.yield %6 : f32
    } -> tensor<200704xf32>
    %expanded = tensor.expand_shape %2 [[0, 1, 2]] output_shape [64, 56, 56] : tensor<200704xf32> into tensor<64x56x56xf32>
    %3 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded, %arg1 : tensor<64x56x56xf32>, tensor<64x56x56xf32>) outs(%0 : tensor<64x56x56xf32>) {
    ^bb0(%in: f32, %in_3: f32, %out: f32):
      %4 = arith.divf %in_3, %cst_2 : f32
      %5 = math.roundeven %4 : f32
      %6 = arith.addf %5, %cst : f32
      %7 = arith.maximumf %6, %cst_0 : f32
      %8 = arith.minimumf %7, %cst_1 : f32
      %9 = arith.fptosi %8 : f32 to i8
      %10 = arith.extsi %9 : i8 to i32
      %11 = arith.sitofp %10 : i32 to f32
      %12 = arith.mulf %11, %cst_2 : f32
      %13 = arith.addf %in, %12 : f32
      %14 = arith.divf %13, %cst_2 : f32
      %15 = math.roundeven %14 : f32
      %16 = arith.addf %15, %cst : f32
      %17 = arith.maximumf %16, %cst_0 : f32
      %18 = arith.minimumf %17, %cst_1 : f32
      %19 = arith.fptosi %18 : f32 to i8
      %20 = arith.extsi %19 : i8 to i32
      %21 = arith.sitofp %20 : i32 to f32
      %22 = arith.mulf %21, %cst_2 : f32
      linalg.yield %22 : f32
    } -> tensor<64x56x56xf32>
    return %3 : tensor<64x56x56xf32>
  }
 }
	#map = affine_map<(d0) -> (d0)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	func.func @main_graph_dispatch_47_elementwise_64x56x56_f32(%arg0: tensor<200704xi8>, %arg1: tensor<64x56x56xf32>) -> tensor<64x56x56xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%cst_0 = arith.constant -1.280000e+02 : f32
	%cst_1 = arith.constant 1.270000e+02 : f32
	%cst_2 = arith.constant 1.562500e-02 : f32
	%0 = tensor.empty() : tensor<64x56x56xf32>
	%1 = tensor.empty() : tensor<200704xf32>
	%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<200704xi8>) outs(%1 : tensor<200704xf32>) {
	^bb0(%in: i8, %out: f32):
	%4 = arith.extsi %in : i8 to i32
	%5 = arith.sitofp %4 : i32 to f32
	%6 = arith.mulf %5, %cst_2 : f32
	linalg.yield %6 : f32
	} -> tensor<200704xf32>
	%expanded = tensor.expand_shape %2 [[0, 1, 2]] output_shape [64, 56, 56] : tensor<200704xf32> into tensor<64x56x56xf32>
	%3 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded, %arg1 : tensor<64x56x56xf32>, tensor<64x56x56xf32>) outs(%0 : tensor<64x56x56xf32>) {
	^bb0(%in: f32, %in_3: f32, %out: f32):
	%4 = arith.divf %in_3, %cst_2 : f32
	%5 = math.roundeven %4 : f32
	%6 = arith.addf %5, %cst : f32
	%7 = arith.maximumf %6, %cst_0 : f32
	%8 = arith.minimumf %7, %cst_1 : f32
	%9 = arith.fptosi %8 : f32 to i8
	%10 = arith.extsi %9 : i8 to i32
	%11 = arith.sitofp %10 : i32 to f32
	%12 = arith.mulf %11, %cst_2 : f32
	%13 = arith.addf %in, %12 : f32
	%14 = arith.divf %13, %cst_2 : f32
	%15 = math.roundeven %14 : f32
	%16 = arith.addf %15, %cst : f32
	%17 = arith.maximumf %16, %cst_0 : f32
	%18 = arith.minimumf %17, %cst_1 : f32
	%19 = arith.fptosi %18 : f32 to i8
	%20 = arith.extsi %19 : i8 to i32
	%21 = arith.sitofp %20 : i32 to f32
	%22 = arith.mulf %21, %cst_2 : f32
	linalg.yield %22 : f32
	} -> tensor<64x56x56xf32>
	return %3 : tensor<64x56x56xf32>
	}
	}