Created
          March 5, 2021 19:02 
        
      - 
      
- 
        Save bjacob/9aec24b5e7c118d6c2e6b393bdd376a4 to your computer and use it in GitHub Desktop. 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | ``` | |
| benoitjacob@benoitjacob:/google/src/cloud/benoitjacob/fig1/google3$ blaze-bin/third_party/iree/experimental/runners/mlir-proto-opt -linalg-comprehensive-bufferize-inplace -print-ir-after-all -mlir-disable-threading /tmp/a.mlir | |
| // *** IR Dump After Canonicalizer *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After CSE *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After Canonicalizer *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After CSE *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @generate_pseudorandom_4d_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> tensor<?x?x?x?xf32> { | |
| %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| return %0 : tensor<?x?x?x?xf32> | |
| } | |
| // *** IR Dump After Canonicalizer *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After CSE *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After Canonicalizer *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After CSE *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %5 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %6 = index_cast %5 : index to i32 | |
| %7 = sitofp %6 : i32 to f32 | |
| tensor.yield %7 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %3 = linalg.mmt_4d_kernel ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<3x4x4x2xf32>) outs(%2 : tensor<2x3x8x4xf32>) -> tensor<2x3x8x4xf32> | |
| %4 = vector.transfer_read %3[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : tensor<2x3x8x4xf32>, vector<2x3x8x4xf32> | |
| vector.print %4 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After Canonicalizer *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After CSE *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After Canonicalizer *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After CSE *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| // *** IR Dump After LoopInvariantCodeMotion *** | |
| func @main() { | |
| %c8 = constant 8 : index | |
| %c4 = constant 4 : index | |
| %c2 = constant 2 : index | |
| %c0 = constant 0 : index | |
| %cst = constant 0.000000e+00 : f32 | |
| %0 = call @generate_pseudorandom_4d_f32(%c2, %c8, %c4, %c2) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = tensor_to_memref %0 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %2 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<3x4x4x2xf32> | |
| %3 = tensor_to_memref %2 : memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| %4 = tensor.generate { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %7 = affine.apply affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)>(%arg0, %arg1, %arg2, %arg3) | |
| %8 = index_cast %7 : index to i32 | |
| %9 = sitofp %8 : i32 to f32 | |
| tensor.yield %9 : f32 | |
| } : tensor<2x3x8x4xf32> | |
| %5 = tensor_to_memref %4 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>> | |
| linalg.mmt_4d_kernel ins(%1, %3 : memref<?x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, memref<3x4x4x2xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) outs(%5 : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>) | |
| %6 = vector.transfer_read %5[%c0, %c0, %c0, %c0], %cst {masked = [false, false, false, false]} : memref<2x3x8x4xf32, affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>>, vector<2x3x8x4xf32> | |
| vector.print %6 : vector<2x3x8x4xf32> | |
| return | |
| } | |
| return val does not fold: %0 = tensor.generate %arg0, %arg1, %arg2, %arg3 { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = index_cast %arg4 : index to i32 | |
| %2 = sitofp %1 : i32 to f32 | |
| tensor.yield %2 : f32 | |
| } : tensor<?x?x?x?xf32> | |
| /tmp/a.mlir:26:10: error: 'std.tensor_load' op requires the same shape for all operands and results | |
| %lhs = call @generate_pseudorandom_4d_f32 (%M, %M0, %K, %K0) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| ^ | |
| /tmp/a.mlir:26:10: note: see current operation: %1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none | |
| /tmp/a.mlir:26:10: error: 'std.tensor_load' op requires the same shape for all operands and results | |
| %lhs = call @generate_pseudorandom_4d_f32 (%M, %M0, %K, %K0) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| ^ | |
| /tmp/a.mlir:26:10: note: see current operation: %1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none | |
| // *** IR Dump After (anonymous namespace)::LinalgComprehensiveBufferizePass Failed *** | |
| #map0 = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)> | |
| #map1 = affine_map<(d0, d1, d2, d3) -> (d0 + d1 + d2 + d3)> | |
| #map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> | |
| #map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> | |
| #map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> | |
| #map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> | |
| "module"() ( { | |
| "func"() ( { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %0 = "tensor.generate"(%arg0, %arg1, %arg2, %arg3) ( { | |
| ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): // no predecessors | |
| %1 = "std.index_cast"(%arg4) : (index) -> i32 | |
| %2 = "std.sitofp"(%1) : (i32) -> f32 | |
| "tensor.yield"(%2) : (f32) -> () | |
| }) : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| "std.return"(%0) : (tensor<?x?x?x?xf32>) -> () | |
| }) {sym_name = "generate_pseudorandom_4d_f32", type = (index, index, index, index) -> tensor<?x?x?x?xf32>} : () -> () | |
| "func"() ( { | |
| %c8 = "std.constant"() {value = 8 : index} : () -> index | |
| %c4 = "std.constant"() {value = 4 : index} : () -> index | |
| %c2 = "std.constant"() {value = 2 : index} : () -> index | |
| %c0 = "std.constant"() {value = 0 : index} : () -> index | |
| %cst = "std.constant"() {value = 0.000000e+00 : f32} : () -> f32 | |
| %0 = "std.call"(%c2, %c8, %c4, %c2) {callee = @generate_pseudorandom_4d_f32} : (index, index, index, index) -> tensor<?x?x?x?xf32> | |
| %1 = "std.tensor_load"(%0) : (tensor<?x?x?x?xf32>) -> none | |
| %2 = "std.tensor_to_memref"(%1) : (none) -> memref<?x?x?x?xf32, #map0> | |
| %3 = "tensor.generate"() ( { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %8 = "affine.apply"(%arg0, %arg1, %arg2, %arg3) {map = #map1} : (index, index, index, index) -> index | |
| %9 = "std.index_cast"(%8) : (index) -> i32 | |
| %10 = "std.sitofp"(%9) : (i32) -> f32 | |
| "tensor.yield"(%10) : (f32) -> () | |
| }) : () -> tensor<3x4x4x2xf32> | |
| %4 = "std.tensor_to_memref"(%3) : (tensor<3x4x4x2xf32>) -> memref<3x4x4x2xf32, #map0> | |
| %5 = "tensor.generate"() ( { | |
| ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index): // no predecessors | |
| %8 = "affine.apply"(%arg0, %arg1, %arg2, %arg3) {map = #map1} : (index, index, index, index) -> index | |
| %9 = "std.index_cast"(%8) : (index) -> i32 | |
| %10 = "std.sitofp"(%9) : (i32) -> f32 | |
| "tensor.yield"(%10) : (f32) -> () | |
| }) : () -> tensor<2x3x8x4xf32> | |
| %6 = "std.tensor_to_memref"(%5) : (tensor<2x3x8x4xf32>) -> memref<2x3x8x4xf32, #map0> | |
| "linalg.mmt_4d_kernel"(%2, %4, %6) ( { | |
| ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): // no predecessors | |
| %8 = "std.mulf"(%arg0, %arg1) : (f32, f32) -> f32 | |
| %9 = "std.addf"(%arg2, %8) : (f32, f32) -> f32 | |
| "linalg.yield"(%9) : (f32) -> () | |
| }) {linalg.memoized_indexing_maps = [#map2, #map3, #map4], operand_segment_sizes = dense<[2, 1]> : vector<2xi32>} : (memref<?x?x?x?xf32, #map0>, memref<3x4x4x2xf32, #map0>, memref<2x3x8x4xf32, #map0>) -> () | |
| %7 = "vector.transfer_read"(%6, %c0, %c0, %c0, %c0, %cst) {masked = [false, false, false, false], permutation_map = #map5} : (memref<2x3x8x4xf32, #map0>, index, index, index, index, f32) -> vector<2x3x8x4xf32> | |
| "vector.print"(%7) : (vector<2x3x8x4xf32>) -> () | |
| "std.dealloc"(%0) : (tensor<?x?x?x?xf32>) -> () | |
| "std.return"() : () -> () | |
| }) {sym_name = "main", type = () -> ()} : () -> () | |
| "module_terminator"() : () -> () | |
| }) : () -> () | |
| ``` | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment