bjacob · January 31, 2021 04:47
diff --git a/error.txt b/error.txt
 <stdin>:4:10: error: operand #3 does not dominate this use
    %0 = linalg.matmul ins(%lhs, %rhs: tensor<?x?xf32>, tensor<?x?xf32>)
         ^
 <stdin>:4:10: note: see current operation: %0 = "scf.for"(%c0, %c-1, %c1, %0) ( {
 ^bb0(%arg3: index, %arg4: tensor<?x?xf32>):  // no predecessors
  %1 = "scf.for"(%c0, %c-1, %c1, %0) ( {
  ^bb0(%arg5: index, %arg6: tensor<?x?xf32>):  // no predecessors
    %2 = "scf.for"(%c0, %c-1, %c1, %0) ( {
    ^bb0(%arg7: index, %arg8: tensor<?x?xf32>):  // no predecessors
      %3 = "std.subtensor"(%arg0, %arg3, %arg7, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
      %4 = "std.subtensor"(%arg0, %arg7, %arg5, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
      %5 = "linalg.matmul"(%3, %4, %3) ( {
      ^bb0(%arg9: f32, %arg10: f32, %arg11: f32):  // no predecessors
        %7 = "std.mulf"(%arg9, %arg10) : (f32, f32) -> f32
        %8 = "std.addf"(%arg11, %7) : (f32, f32) -> f32
        "linalg.yield"(%8) : (f32) -> ()
      }) {operand_segment_sizes = dense<[2, 1]> : vector<2xi32>} : (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
      %6 = "std.subtensor_insert"(%5, %arg8, %arg3, %arg5, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 1, 2, 2, 2]> : vector<5xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
      "scf.yield"(%6) : (tensor<?x?xf32>) -> ()
    }) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
    "scf.yield"(%2) : (tensor<?x?xf32>) -> ()
  }) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
  "scf.yield"(%1) : (tensor<?x?xf32>) -> ()
 }) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
 <stdin>:4:10: note: operand defined here (op in the same block)
diff --git a/foo.cc b/foo.cc
 // Thanks @NicoVaz for the advice. This is how far I got.
  
 // Now it gets farther, but reports an error, "operand #3 does not dominate this use"
 // See error.txt below.

 class MehMatmulToSCFPattern : public OpRewritePattern<meh::MatmulOp> {
 public:
  using OpRewritePattern<meh::MatmulOp>::OpRewritePattern;

  LogicalResult matchAndRewrite(meh::MatmulOp op,
                                PatternRewriter &rewriter) const override {
    auto lhsVal = op.lhs();
    auto rhsVal = op.rhs();
    auto dstVal = op.dst();
    auto dstValType = dstVal.getType().cast<ShapedType>();
    auto dstValTensorTye = dstVal.getType();
    auto dstShape = dstValType.getShape();  // ArrayRef<int64_t>
    auto M = dstShape[0];
    auto N = dstShape[1];
    auto rhsValType = dstVal.getType().cast<ShapedType>();
    auto rhsShape = rhsValType.getShape();

    edsc::ScopedContext scope(rewriter, op.getLoc());

    auto K = rhsShape[0];

    Value zero = edsc::intrinsics::std_constant_index(0);
    Value one = edsc::intrinsics::std_constant_index(1);

    Value boundM = edsc::intrinsics::std_constant_index(M);
    Value boundN = edsc::intrinsics::std_constant_index(N);
    Value boundK = edsc::intrinsics::std_constant_index(K);

    auto to_valuevector = [](ValueRange r) {
      return scf::ValueVector(r.begin(), r.end());
    };

    auto loopnest = edsc::loopNestBuilder(zero, boundM, one, {dstVal}, [&](Value m, ValueRange iterArgs) {
      return to_valuevector(edsc::loopNestBuilder(zero, boundN, one, {dstVal}, [&](Value n, ValueRange iterArgs) {
        return to_valuevector(edsc::loopNestBuilder(zero, boundK, one, {dstVal}, [&](Value k, ValueRange iterArgs) {
          Value dst = iterArgs[0];
          Value lhs_entry = rewriter.create<SubTensorOp>(
            op.getLoc(), lhsVal,
            ValueRange{m, k},
            ValueRange{one, one},
            ValueRange{one, one});
          Value rhs_entry = rewriter.create<SubTensorOp>(
            op.getLoc(), lhsVal,
            ValueRange{k, n},
            ValueRange{one, one},
            ValueRange{one, one});
          auto product = rewriter.create<linalg::MatmulOp>(
            op.getLoc(), TypeRange{dstVal.getType()},
            ValueRange{lhs_entry, rhs_entry}, lhs_entry);
          auto inserted = rewriter.create<SubTensorInsertOp>(op.getLoc(), product.getResults()[0], dst, ValueRange{m, n},
            ValueRange{one, one},
            ValueRange{one, one});
          return to_valuevector({inserted});
        }).getResults());
      }).getResults());
    });

    rewriter.replaceOp(op, loopnest.getResults()[0]);
    return success();
  }
 };
	<stdin>:4:10: error: operand #3 does not dominate this use
	%0 = linalg.matmul ins(%lhs, %rhs: tensor<?x?xf32>, tensor<?x?xf32>)
	^
	<stdin>:4:10: note: see current operation: %0 = "scf.for"(%c0, %c-1, %c1, %0) ( {
	^bb0(%arg3: index, %arg4: tensor<?x?xf32>): // no predecessors
	%1 = "scf.for"(%c0, %c-1, %c1, %0) ( {
	^bb0(%arg5: index, %arg6: tensor<?x?xf32>): // no predecessors
	%2 = "scf.for"(%c0, %c-1, %c1, %0) ( {
	^bb0(%arg7: index, %arg8: tensor<?x?xf32>): // no predecessors
	%3 = "std.subtensor"(%arg0, %arg3, %arg7, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
	%4 = "std.subtensor"(%arg0, %arg7, %arg5, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 2, 2, 2]> : vector<4xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
	%5 = "linalg.matmul"(%3, %4, %3) ( {
	^bb0(%arg9: f32, %arg10: f32, %arg11: f32): // no predecessors
	%7 = "std.mulf"(%arg9, %arg10) : (f32, f32) -> f32
	%8 = "std.addf"(%arg11, %7) : (f32, f32) -> f32
	"linalg.yield"(%8) : (f32) -> ()
	}) {operand_segment_sizes = dense<[2, 1]> : vector<2xi32>} : (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
	%6 = "std.subtensor_insert"(%5, %arg8, %arg3, %arg5, %c1, %c1, %c1, %c1) {operand_segment_sizes = dense<[1, 1, 2, 2, 2]> : vector<5xi32>, static_offsets = [-9223372036854775808, -9223372036854775808], static_sizes = [-1, -1], static_strides = [-9223372036854775808, -9223372036854775808]} : (tensor<?x?xf32>, tensor<?x?xf32>, index, index, index, index, index, index) -> tensor<?x?xf32>
	"scf.yield"(%6) : (tensor<?x?xf32>) -> ()
	}) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
	"scf.yield"(%2) : (tensor<?x?xf32>) -> ()
	}) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
	"scf.yield"(%1) : (tensor<?x?xf32>) -> ()
	}) : (index, index, index, tensor<?x?xf32>) -> tensor<?x?xf32>
	<stdin>:4:10: note: operand defined here (op in the same block)
	// Thanks @NicoVaz for the advice. This is how far I got.

	// Now it gets farther, but reports an error, "operand #3 does not dominate this use"
	// See error.txt below.

	class MehMatmulToSCFPattern : public OpRewritePattern<meh::MatmulOp> {
	public:
	using OpRewritePattern<meh::MatmulOp>::OpRewritePattern;

	LogicalResult matchAndRewrite(meh::MatmulOp op,
	PatternRewriter &rewriter) const override {
	auto lhsVal = op.lhs();
	auto rhsVal = op.rhs();
	auto dstVal = op.dst();
	auto dstValType = dstVal.getType().cast<ShapedType>();
	auto dstValTensorTye = dstVal.getType();
	auto dstShape = dstValType.getShape(); // ArrayRef<int64_t>
	auto M = dstShape[0];
	auto N = dstShape[1];
	auto rhsValType = dstVal.getType().cast<ShapedType>();
	auto rhsShape = rhsValType.getShape();

	edsc::ScopedContext scope(rewriter, op.getLoc());

	auto K = rhsShape[0];

	Value zero = edsc::intrinsics::std_constant_index(0);
	Value one = edsc::intrinsics::std_constant_index(1);

	Value boundM = edsc::intrinsics::std_constant_index(M);
	Value boundN = edsc::intrinsics::std_constant_index(N);
	Value boundK = edsc::intrinsics::std_constant_index(K);

	auto to_valuevector = [](ValueRange r) {
	return scf::ValueVector(r.begin(), r.end());
	};

	auto loopnest = edsc::loopNestBuilder(zero, boundM, one, {dstVal}, [&](Value m, ValueRange iterArgs) {
	return to_valuevector(edsc::loopNestBuilder(zero, boundN, one, {dstVal}, [&](Value n, ValueRange iterArgs) {
	return to_valuevector(edsc::loopNestBuilder(zero, boundK, one, {dstVal}, [&](Value k, ValueRange iterArgs) {
	Value dst = iterArgs[0];
	Value lhs_entry = rewriter.create<SubTensorOp>(
	op.getLoc(), lhsVal,
	ValueRange{m, k},
	ValueRange{one, one},
	ValueRange{one, one});
	Value rhs_entry = rewriter.create<SubTensorOp>(
	op.getLoc(), lhsVal,
	ValueRange{k, n},
	ValueRange{one, one},
	ValueRange{one, one});
	auto product = rewriter.create<linalg::MatmulOp>(
	op.getLoc(), TypeRange{dstVal.getType()},
	ValueRange{lhs_entry, rhs_entry}, lhs_entry);
	auto inserted = rewriter.create<SubTensorInsertOp>(op.getLoc(), product.getResults()[0], dst, ValueRange{m, n},
	ValueRange{one, one},
	ValueRange{one, one});
	return to_valuevector({inserted});
	}).getResults());
	}).getResults());
	});

	rewriter.replaceOp(op, loopnest.getResults()[0]);
	return success();
	}
	};