bjacob/README.md

$ iree/experimental/runners/mlir-proto-opt -linalg-comprehensive-bufferize-inplace a.mlir

F0305 14:30:11.381034  760629 logging.cc:107] assert.h assertion failed at third_party/iree/experimental/runners/LinalgComprehensiveBufferizePass.cpp:800 in mlir::LogicalResult allocateBuffersForResults(mlir::OpBuilder &, mlir::Location, mlir::linalg::LinalgOp, SmallVectorImpl<mlir::Value> &, mlir::BlockAndValueMapping &): op.getNumOutputTensors() == op->getNumResults()

In GDB: the fact that the op has 4 parallel and 2 reduction loops shows that it is the mmt_4d_kernel op.
This op has 1 output tensor and 1 result.
Why does this testcase make the compiler believe that it has 0 results ?

    at third_party/iree/experimental/runners/LinalgComprehensiveBufferizePass.cpp:800
800	  assert(op.getNumOutputTensors() == op->getNumResults());
(gdb) p op
$1 = {<mlir::OpInterface<mlir::linalg::LinalgOp, mlir::linalg::detail::LinalgOpInterfaceTraits>> = {<mlir::detail::Interface<mlir::linalg::LinalgOp, mlir::Operation*, mlir::linalg::detail::LinalgOpInterfaceTraits, mlir::Op<mlir::linalg::LinalgOp>, OpTrait::TraitBase>> = {<mlir::Op<mlir::linalg::LinalgOp>> = {<mlir::OpState> = {state = 0x57a83fc6b010}, <No data fields>}, 
      impl = 0x57a83fd8f620}, <No data fields>}, <No data fields>}
(gdb) l
795	    SmallVectorImpl<Value> &resultBuffers, BlockAndValueMapping &bvm) {
796	  // Lazily compute loopRanges.
797	  SmallVector<Range, 4> loopRanges;
798	
799	  // Linalg invariant: output tensors and result match 1-1.
800	  assert(op.getNumOutputTensors() == op->getNumResults());
801	  for (auto &opOperand : op.getOutputOpOperands()) {
802	    Value output = opOperand.get();
803	    if (output.getType().isa<MemRefType>()) {
804	      resultBuffers.push_back(output);
(gdb) p op.getNumParallelLoops()
$4 = 4
(gdb) p op.getNumReductionLoops()
$5 = 2
(gdb) p op.getNumOutputTensors()
$6 = 1
(gdb) p op->getNumResults()
$7 = 0

	func @main() {
	%lhs = constant dense<[[[[2.]]]]> : tensor<1x1x1x1xf32>
	%rhs = constant dense<[[[[3.]]]]> : tensor<1x1x1x1xf32>
	%accum = constant dense<[[[[1.]]]]> : tensor<1x1x1x1xf32>
	%result = linalg.mmt_4d_kernel ins(%lhs, %rhs : tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>)
	outs(%accum: tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>

	%c0 = constant 0: index
	%v0 = constant 0.0 : f32
	%result_vector = vector.transfer_read %result[%c0, %c0, %c0, %c0], %v0 : tensor<1x1x1x1xf32>, vector<1x1x1x1xf32>
	vector.print %result_vector: vector<1x1x1x1xf32>

	return
	}