March 5, 2021 18:59 · March 5, 2021 19:02 · March 5, 2021 19:38 · March 5, 2021 19:58 · March 5, 2021 21:23 · April 6, 2021 15:47
 ```
 benoitjacob@benoitjacob:/google/src/cloud/benoitjacob/fig1/google3$ blaze-bin/third_party/iree/experimental/runners/mlir-proto-opt -linalg-comprehensive-bufferize-inplace -print-ir-after-all -mlir-disable-threading /tmp/a.mlir
 // *** IR Dump After Canonicalizer ***
 func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
  %0 = tensor.generate %arg0, %arg1, %arg2, %arg3  {
  ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index):  // no predecessors
    %1 = index_cast %arg4 : index to i32
    %2 = sitofp %1 : i32 to f32
    tensor.yield %2 : f32
  } : tensor<?x?x?x?xf32>
 [  1] // RUN: mlir-proto-opt %s -linalg-comprehensive-bufferize-inplace [FAIL]
 mlir-proto-opt /usr/local/google/_blaze_benoitjacob/e44aed074990e2268fd21257b0410155/execroot/google3/blaze-out/k8-asan-dbg/bin/third_party/iree/experimental/runners/test/test_mmt_4d_kernel_unit_000.mlir.test.runfiles/google3/third_party/iree/experimental/runners/test/test_mmt_4d_kernel_unit_000.mlir -linalg-comprehensive-bufferize-inplace
 =================================================================
 ==771875==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60c0000889c0 at pc 0x55bc335ba9a1 bp 0x7ffd14fac140 sp 0x7ffd14fac138
 READ of size 8 at 0x60c0000889c0 thread T0
    #0 0x55bc335ba9a0 in insertIntoCurrent third_party/llvm/llvm-project/mlir/include/mlir/IR/UseDefLists.h:201:24
    #1 0x55bc335ba9a0 in set third_party/llvm/llvm-project/mlir/include/mlir/IR/UseDefLists.h:129:5
    #2 0x55bc335ba9a0 in set third_party/llvm/llvm-project/mlir/lib/IR/Value.cpp:215:38
    #3 0x55bc335ba9a0 in mlir::IRObjectWithUseList<m
 diff --git a/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py b/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
 --- a/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
 +++ b/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
 @@ -68,3 +68,37 @@ def dot(A=TensorDef(T1, S.M), B=TensorDe
   """
   implements(ContractionOpInterface)
   C[None] += cast(U, A[D.m]) * cast(U, B[D.m])
 +
 +@linalg_structured_op
 +def mmt_4d_kernel(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
 Dear Professors!

 Thank you for the most interesting paper (arXiv:2002.09472v2).

 I would like to submit the following comment to you, counting on your benevolence in case I am mistaken here, as I have long exited math and am talking out of dim memories.

 In section 8.2 'Modular roots', it seems to me that both Lemma 8.5 and Theorem 8.7 may be viewed as applications of Cebotarev's density theorem (which is a fixture of algebraic number theory textbooks).

 In the case of Lemma 8.5, apply Cebotarev's theorem to the Galois extension of Q generated by S (that is, Q(S') where S' consists of the elements of S together with all their images under automorphisms of the algebraic closure of Q), and the conjugacy class of the identity in Gal(Q(S')/Q). Then, if I'm not mistaken, Cebotarev's theorem says that the set of primes that split completely in Q(S') has positive density, namely 1/[Q(S') : Q], and any such completely-split prime satisfies the requirement of Lemma 8.5, namely, the desired map from Z[S] to F_p is obt
 Dear Professors!

 Thank you for the most interesting paper (arXiv:2002.09472v2).

 I would like to submit the following comment to you, counting on your benevolence in case I am mistaken here, as I have long exited math and am talking out of dim memories.

 In section 8.2 'Modular roots', it seems to me that both Lemma 8.5 and Theorem 8.7 may be viewed as applications of Cebotarev's density theorem (which is a fixture of algebraic number theory textbooks).

 In the case of Lemma 8.5, apply Cebotarev's theorem to the Galois extension of Q generated by S (that is, Q(S') where S' consists of the elements of S together with all their images under automorphisms of the algebraic closure of Q), and the conjugacy class of the identity in Gal(Q(S')/Q). Then, if I'm not mistaken, Cebotarev's theorem says that the set of primes that split completely in Q(S') has positive density, namely 1/[Q(S') : Q], and any such completely-split prime satisfies the requirement of Lemma 8.5, namely, the desired map from Z[S] to F_p is obt
 #map0 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map3 = affine_map<(d0, d1) -> (d0, d1)>
 module  {
  func private @actual(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
    return %0 : tensor<?x?xf32>
  }
  func private @expected(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
 // -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
 #map0 = affine_map<(d0, d1, d2) -> (d0, d1)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map3 = affine_map<(d0, d1) -> (d0, d1)>
 module  {
  func private @actual(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
    return %0 : tensor<?x?xf32>
  }
	```
	benoitjacob@benoitjacob:/google/src/cloud/benoitjacob/fig1/google3$ blaze-bin/third_party/iree/experimental/runners/mlir-proto-opt -linalg-comprehensive-bufferize-inplace -print-ir-after-all -mlir-disable-threading /tmp/a.mlir
	// * IR Dump After Canonicalizer *
	func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
	%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
	^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
	%1 = index_cast %arg4 : index to i32
	%2 = sitofp %1 : i32 to f32
	tensor.yield %2 : f32
	} : tensor<?x?x?x?xf32>
	[ 1] // RUN: mlir-proto-opt %s -linalg-comprehensive-bufferize-inplace [FAIL]
	mlir-proto-opt /usr/local/google/_blaze_benoitjacob/e44aed074990e2268fd21257b0410155/execroot/google3/blaze-out/k8-asan-dbg/bin/third_party/iree/experimental/runners/test/test_mmt_4d_kernel_unit_000.mlir.test.runfiles/google3/third_party/iree/experimental/runners/test/test_mmt_4d_kernel_unit_000.mlir -linalg-comprehensive-bufferize-inplace
	=================================================================
	==771875==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60c0000889c0 at pc 0x55bc335ba9a1 bp 0x7ffd14fac140 sp 0x7ffd14fac138
	READ of size 8 at 0x60c0000889c0 thread T0
	#0 0x55bc335ba9a0 in insertIntoCurrent third_party/llvm/llvm-project/mlir/include/mlir/IR/UseDefLists.h:201:24
	#1 0x55bc335ba9a0 in set third_party/llvm/llvm-project/mlir/include/mlir/IR/UseDefLists.h:129:5
	#2 0x55bc335ba9a0 in set third_party/llvm/llvm-project/mlir/lib/IR/Value.cpp:215:38
	#3 0x55bc335ba9a0 in mlir::IRObjectWithUseList<m
	diff --git a/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py b/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
	--- a/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
	+++ b/google3/third_party/llvm/llvm-project/mlir/lib/Bindings/Python/mlir/tools/linalg_opdsl/ops/core_named_ops.py
	@@ -68,3 +68,37 @@ def dot(A=TensorDef(T1, S.M), B=TensorDe
	"""
	implements(ContractionOpInterface)
	C[None] += cast(U, A[D.m]) * cast(U, B[D.m])
	+
	+@linalg_structured_op
	+def mmt_4d_kernel(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
	Dear Professors!

	Thank you for the most interesting paper (arXiv:2002.09472v2).

	I would like to submit the following comment to you, counting on your benevolence in case I am mistaken here, as I have long exited math and am talking out of dim memories.

	In section 8.2 'Modular roots', it seems to me that both Lemma 8.5 and Theorem 8.7 may be viewed as applications of Cebotarev's density theorem (which is a fixture of algebraic number theory textbooks).

	In the case of Lemma 8.5, apply Cebotarev's theorem to the Galois extension of Q generated by S (that is, Q(S') where S' consists of the elements of S together with all their images under automorphisms of the algebraic closure of Q), and the conjugacy class of the identity in Gal(Q(S')/Q). Then, if I'm not mistaken, Cebotarev's theorem says that the set of primes that split completely in Q(S') has positive density, namely 1/[Q(S') : Q], and any such completely-split prime satisfies the requirement of Lemma 8.5, namely, the desired map from Z[S] to F_p is obt
	#map0 = affine_map<(d0, d1, d2) -> (d0, d1)>
	#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
	#map3 = affine_map<(d0, d1) -> (d0, d1)>
	module {
	func private @actual(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
	%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
	return %0 : tensor<?x?xf32>
	}
	func private @expected(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
	// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
	#map0 = affine_map<(d0, d1, d2) -> (d0, d1)>
	#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
	#map3 = affine_map<(d0, d1) -> (d0, d1)>
	module {
	func private @actual(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> attributes {noinline} {
	%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
	return %0 : tensor<?x?xf32>
	}