Skip to content

Instantly share code, notes, and snippets.

@bjacob
Created March 5, 2021 19:02
Show Gist options
  • Save bjacob/9aec24b5e7c118d6c2e6b393bdd376a4 to your computer and use it in GitHub Desktop.
Save bjacob/9aec24b5e7c118d6c2e6b393bdd376a4 to your computer and use it in GitHub Desktop.
```
benoitjacob@benoitjacob:/google/src/cloud/benoitjacob/fig1/google3$ blaze-bin/third_party/iree/experimental/runners/mlir-proto-opt -linalg-comprehensive-bufferize-inplace -print-ir-after-all -mlir-disable-threading /tmp/a.mlir
// *** IR Dump After Canonicalizer ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After CSE ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After Canonicalizer ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After CSE ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> {
%0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
return %0 : tensor<?x?x?x?xf32>
}
// *** IR Dump After Canonicalizer ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After CSE ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After Canonicalizer ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After CSE ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<3x4x4x2xf32>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%6 = index_cast %5 : index to i32
%7 = sitofp %6 : i32 to f32
tensor.yield %7 : f32
} : tensor<2x3x8x4xf32>
%3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32>
%4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32>
vector.print %4 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After Canonicalizer ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After CSE ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After Canonicalizer ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After CSE ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
// *** IR Dump After LoopInvariantCodeMotion ***
func @main() {
%c8 = constant 8 : index
%c4 = constant 4 : index
%c2 = constant 2 : index
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%2 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<3x4x4x2xf32>
%3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
%4 = tensor.generate {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3)
%8 = index_cast %7 : index to i32
%9 = sitofp %8 : i32 to f32
tensor.yield %9 : f32
} : tensor<2x3x8x4xf32>
%5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>
linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>)
%6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32>
vector.print %6 : vector<2x3x8x4xf32>
return
}
return val does not fold: %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = index_cast %arg4 : index to i32
%2 = sitofp %1 : i32 to f32
tensor.yield %2 : f32
} : tensor<?x?x?x?xf32>
/tmp/a.mlir:26:10: error: 'std.tensor_load' op requires the same shape for all operands and results
%lhs = call @generate_pseudorandom_4d_f32 (%M, %M0, %K, %K0) : (index, index, index, index) -> tensor<?x?x?x?xf32>
^
/tmp/a.mlir:26:10: note: see current operation: %1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none
/tmp/a.mlir:26:10: error: 'std.tensor_load' op requires the same shape for all operands and results
%lhs = call @generate_pseudorandom_4d_f32 (%M, %M0, %K, %K0) : (index, index, index, index) -> tensor<?x?x?x?xf32>
^
/tmp/a.mlir:26:10: note: see current operation: %1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none
// *** IR Dump After (anonymous namespace)::LinalgComprehensiveBufferizePass Failed ***
#map0 = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
"module"() ( {
"func"() ( {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%0 = "tensor.generate"(%arg0, %arg1, %arg2, %arg3) ( {
^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors
%1 = "std.index_cast"(%arg4) : (index) -> i32
%2 = "std.sitofp"(%1) : (i32) -> f32
"tensor.yield"(%2) : (f32) -> ()
}) : (index, index, index, index) -> tensor<?x?x?x?xf32>
"std.return"(%0) : (tensor<?x?x?x?xf32>) -> ()
}) {sym_name = "generate_pseudorandom_4d_f32", type = (index, index, index, index) -> tensor<?x?x?x?xf32>} : () -> ()
"func"() ( {
%c8 = "std.constant"() {value = 8 : index} : () -> index
%c4 = "std.constant"() {value = 4 : index} : () -> index
%c2 = "std.constant"() {value = 2 : index} : () -> index
%c0 = "std.constant"() {value = 0 : index} : () -> index
%cst = "std.constant"() {value = 0.000000e+00 : f32} : () -> f32
%0 = "std.call"(%c2, %c8, %c4, %c2) {callee = @generate_pseudorandom_4d_f32} : (index, index, index, index) -> tensor<?x?x?x?xf32>
%1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none
%2 = "std.tensor_to_memref"(%1) : (none) -> memref<?x?x?x?xf32, #map0>
%3 = "tensor.generate"() ( {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%8 = "affine.apply"(%arg0, %arg1, %arg2, %arg3) {map = #map1} : (index, index, index, index) -> index
%9 = "std.index_cast"(%8) : (index) -> i32
%10 = "std.sitofp"(%9) : (i32) -> f32
"tensor.yield"(%10) : (f32) -> ()
}) : () -> tensor<3x4x4x2xf32>
%4 = "std.tensor_to_memref"(%3) : (tensor<3x4x4x2xf32>) -> memref<3x4x4x2xf32, #map0>
%5 = "tensor.generate"() ( {
^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors
%8 = "affine.apply"(%arg0, %arg1, %arg2, %arg3) {map = #map1} : (index, index, index, index) -> index
%9 = "std.index_cast"(%8) : (index) -> i32
%10 = "std.sitofp"(%9) : (i32) -> f32
"tensor.yield"(%10) : (f32) -> ()
}) : () -> tensor<2x3x8x4xf32>
%6 = "std.tensor_to_memref"(%5) : (tensor<2x3x8x4xf32>) -> memref<2x3x8x4xf32, #map0>
"linalg.mmt_4d_kernel"(%2, %4, %6) ( {
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): // no predecessors
%8 = "std.mulf"(%arg0, %arg1) : (f32, f32) -> f32
%9 = "std.addf"(%arg2, %8) : (f32, f32) -> f32
"linalg.yield"(%9) : (f32) -> ()
}) {linalg.memoized_indexing_maps = [#map2, #map3, #map4], operand_segment_sizes = dense<[2, 1]> : vector<2xi32>} : (memref<?x?x?x?xf32, #map0>, memref<3x4x4x2xf32, #map0>, memref<2x3x8x4xf32, #map0>) -> ()
%7 = "vector.transfer_read"(%6, %c0, %c0, %c0, %c0, %cst) {masked = [false, false, false, false], permutation_map = #map5} : (memref<2x3x8x4xf32, #map0>, index, index, index, index, f32) -> vector<2x3x8x4xf32>
"vector.print"(%7) : (vector<2x3x8x4xf32>) -> ()
"std.dealloc"(%0) : (tensor<?x?x?x?xf32>) -> ()
"std.return"() : () -> ()
}) {sym_name = "main", type = () -> ()} : () -> ()
"module_terminator"() : () -> ()
}) : () -> ()
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment