Created
March 14, 2021 12:26
-
-
Save antiagainst/58d1232ecf68a18aafdab5fe06c19d09 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// *** IR Dump After mlir::mhlo::(anonymous namespace)::LegalizeControlFlowPass *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::HLOToHLOPreprocessing *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::DecomposeHLOClampPass *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After RemoveShapeConstraints *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToSCF *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After SCFToStandard *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToStandard *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToLinalgOnTensors *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ConvertShapeToShapex *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::FlattenTuplesInCFGPass *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::LegalizeInputTypesPass *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::MaterializeReflectionAttrsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::ExpandVariableDynamicDimsPass *** | |
module { | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ExpandFunctionDynamicDimsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::TieDynamicShapesPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::MaterializeShapeCalculationsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::HoistShapeCalculations *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::PrePartitioningConversionPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertHLOToLinalgOnTensorsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%c1_2 = constant 1 : index | |
%c5 = constant 5 : index | |
%c0_3 = constant 0 : index | |
%c2 = constant 2 : index | |
%c3 = constant 3 : index | |
%c1_4 = constant 1 : index | |
%c3_5 = constant 3 : index | |
%c9 = constant 9 : index | |
%3 = linalg.init_tensor [%c3, %c9] : tensor<?x?xi32> | |
%4 = linalg.fill(%3, %2) : tensor<?x?xi32>, i32 -> tensor<?x?xi32> | |
%5 = subtensor_insert %0 into %4[%c0, %c1_2] [%c2, %c3_5] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32> | |
%6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32> | |
check.expect_eq(%6, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After LinalgFoldUnitExtentDims *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%c2 = constant 2 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [%c3, %c9] : tensor<?x?xi32> | |
%4 = linalg.fill(%3, %2) : tensor<?x?xi32>, i32 -> tensor<?x?xi32> | |
%5 = subtensor_insert %0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32> | |
%6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32> | |
check.expect_eq(%6, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::FusionOfTensorOpsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::DispatchLinalgOnTensorsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch.workgroups[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = tensor.extract %5[] : tensor<i32> | |
%7 = linalg.fill(%4, %6) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %7, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
%3 = flow.dispatch.workgroups[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%5 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%6 = subtensor_insert %4 into %5[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %6, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch.workgroups[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = tensor.extract %5[] : tensor<i32> | |
%7 = linalg.fill(%4, %6) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %7, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
%3 = flow.dispatch.workgroups[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%5 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%6 = subtensor_insert %4 into %5[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %6, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::OutlineDispatchRegions2Pass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::DeduplicateExecutablesPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::PostPartitioningConversionPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::HoistUnstreamableOps *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
check.expect_eq(%3, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::FormStreamsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%c9, %c3, %c1, %1, %0) : (index, index, index, tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: index, %arg1: index, %arg2: index, %arg3: tensor<i32>, %arg4: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%arg0, %arg1, %arg2](%arg3) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%arg0, %arg1, %arg2](%arg4, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::OutlineLargeConstantsPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%c9, %c3, %c1, %1, %0) : (index, index, index, tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: index, %arg1: index, %arg2: index, %arg3: tensor<i32>, %arg4: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%arg0, %arg1, %arg2](%arg3) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%arg0, %arg1, %arg2](%arg4, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::IdentifyConstantPoolsPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeConstantPoolBuffersPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readonly:3x9xi32>, %arg2: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeInterfacesPass *** | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readonly:3x9xi32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%4 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%5 = subtensor_insert %3 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32> | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::PropagateConstantWorkgroupInfoPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::LinalgBufferizePass *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = load %0[] : memref<i32> | |
%7 = tensor.extract %5[] : tensor<i32> | |
linalg.fill(%2, %6) : memref<3x9xi32>, i32 | |
%8 = linalg.fill(%4, %7) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = load %0[] : memref<i32> | |
linalg.fill(%2, %4) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = load %0[] : memref<i32> | |
linalg.fill(%2, %4) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::RemoveDeadMemAllocsPass *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After (anonymous namespace)::CopyRemovalPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConcretizeTileAmongWorkgroupsPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::TileAndVectorizeInOneWorkgroupPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToGPUPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%c0_0 = constant 0 : index | |
%3 = dim %1, %c0_0 : memref<3x9xi32> | |
%c1 = constant 1 : index | |
%4 = dim %1, %c1 : memref<3x9xi32> | |
%c0_1 = constant 0 : index | |
%c1_2 = constant 1 : index | |
%c1_3 = constant 1 : index | |
%5 = subi %4, %c0_1 : index | |
%6 = divi_signed %5, %c1_2 : index | |
%7 = muli %c1_3, %6 : index | |
%8 = subi %3, %c0_1 : index | |
%9 = divi_signed %8, %c1_2 : index | |
%10 = muli %7, %9 : index | |
%c0_4 = constant 0 : index | |
%c1_5 = constant 1 : index | |
%11 = "gpu.grid_dim"() {dimension = "x"} : () -> index | |
%12 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%13 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%14 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%15 = muli %12, %13 : index | |
%16 = addi %15, %14 : index | |
%17 = muli %13, %11 : index | |
%18 = muli %16, %c1_5 : index | |
%19 = addi %c0_4, %18 : index | |
%20 = cmpi slt, %19, %10 : index | |
scf.if %20 { | |
%21 = divi_signed %19, %7 : index | |
%22 = affine.apply affine_map<(d0) -> (d0)>(%21) | |
%23 = affine.apply affine_map<(d0) -> (d0)>(%21) | |
%24 = remi_signed %19, %7 : index | |
%25 = divi_signed %24, %c1_3 : index | |
%26 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%27 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%28 = remi_signed %24, %c1_3 : index | |
%29 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%30 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
%31 = load %1[%29, %30] : memref<3x9xi32> | |
%32 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%33 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
store %2, %1[%32, %33] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertVectorToGPUPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After ConvertAffineToStandard *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ResolveShapeOpsPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorTransferOptimizationPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After LegalizeStandardForSPIRV *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorizeMemRefPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ForOpCanonicalizationPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToSPIRVPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 27 : i32 | |
%2 = spv.Constant 9 : i32 | |
%3 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%4 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.Constant 0 : i32 | |
%6 = spv.AccessChain %3[%5, %5] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.Constant 32 : i32 | |
%12 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%13 = spv.Load "Input" %12 : vector<3xi32> | |
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32> | |
%15 = spv.IMul %10, %11 : i32 | |
%16 = spv.IAdd %15, %14 : i32 | |
%17 = spv.SLessThan %16, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %17, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%18 = spv.SDiv %16, %2 : i32 | |
%19 = spv.GLSL.SAbs %16 : i32 | |
%20 = spv.GLSL.SAbs %2 : i32 | |
%21 = spv.UMod %19, %20 : i32 | |
%22 = spv.IEqual %16, %19 : i32 | |
%23 = spv.SNegate %21 : i32 | |
%24 = spv.Select %22, %21, %23 : i1, i32 | |
%25 = spv.Constant 0 : i32 | |
%26 = spv.Constant 0 : i32 | |
%27 = spv.Constant 9 : i32 | |
%28 = spv.IMul %27, %18 : i32 | |
%29 = spv.IAdd %26, %28 : i32 | |
%30 = spv.Constant 1 : i32 | |
%31 = spv.IMul %30, %24 : i32 | |
%32 = spv.IAdd %29, %31 : i32 | |
%33 = spv.AccessChain %4[%25, %32] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %33, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After SPIRVLowerABIAttributes *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 27 : i32 | |
%2 = spv.Constant 9 : i32 | |
%3 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%4 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.Constant 0 : i32 | |
%6 = spv.AccessChain %3[%5, %5] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.Constant 32 : i32 | |
%12 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%13 = spv.Load "Input" %12 : vector<3xi32> | |
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32> | |
%15 = spv.IMul %10, %11 : i32 | |
%16 = spv.IAdd %15, %14 : i32 | |
%17 = spv.SLessThan %16, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %17, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%18 = spv.SDiv %16, %2 : i32 | |
%19 = spv.GLSL.SAbs %16 : i32 | |
%20 = spv.GLSL.SAbs %2 : i32 | |
%21 = spv.UMod %19, %20 : i32 | |
%22 = spv.IEqual %16, %19 : i32 | |
%23 = spv.SNegate %21 : i32 | |
%24 = spv.Select %22, %21, %23 : i1, i32 | |
%25 = spv.Constant 0 : i32 | |
%26 = spv.Constant 0 : i32 | |
%27 = spv.Constant 9 : i32 | |
%28 = spv.IMul %27, %18 : i32 | |
%29 = spv.IAdd %26, %28 : i32 | |
%30 = spv.Constant 1 : i32 | |
%31 = spv.IMul %30, %24 : i32 | |
%32 = spv.IAdd %29, %31 : i32 | |
%33 = spv.AccessChain %4[%25, %32] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %33, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After Canonicalizer *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After CSE *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After SPIRVUpdateVCE *** | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183226960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_182700608__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_182700608__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183226960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::PropagateConstantWorkgroupInfoPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readonly:3x9xi32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%4 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%5 = subtensor_insert %3 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%4 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%5 = subtensor_insert %3 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%4 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%5 = subtensor_insert %3 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %2 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::LinalgBufferizePass *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%4 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
%5 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%6 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%7 = flow.dispatch.tensor.load %3 : !flow.dispatch.tensor<readonly:3x9xi32> -> tensor<3x9xi32> | |
%c0_0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
linalg.copy(%2, %4) : memref<3x9xi32>, memref<3x9xi32> | |
%8 = subview %4[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %8) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%9 = subtensor_insert %6 into %7[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%4 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
%5 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
linalg.copy(%2, %4) : memref<3x9xi32>, memref<3x9xi32> | |
%6 = subview %4[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %6) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : !flow.dispatch.tensor<readonly:3x9xi32> | |
%4 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
%5 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
linalg.copy(%2, %4) : memref<3x9xi32>, memref<3x9xi32> | |
%6 = subview %4[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %6) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::RemoveDeadMemAllocsPass *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After (anonymous namespace)::CopyRemovalPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConcretizeTileAmongWorkgroupsPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readonly:3x9xi32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::TileAndVectorizeInOneWorkgroupPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readonly:3x9xi32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro1[%c0] : memref<3x9xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo2[%c0] : memref<3x9xi32> | |
linalg.copy(%1, %2) : memref<3x9xi32>, memref<3x9xi32> | |
%3 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %3) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @ro1, set=0, binding=1, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo2, set=0, binding=2, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: error: 'hal.executable.entry_point' op cannot override workgroup_count_region | |
%res = "mhlo.pad"(%input, %c0) { | |
^ | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: note: see current operation: "hal.executable.entry_point"() ( { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = "std.constant"() {value = 1 : index} : () -> index | |
%0 = "affine.apply"(%arg0, %arg1, %arg2) {map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>} : (index, index, index) -> index | |
"hal.return"(%0, %c1, %c1) : (index, index, index) -> () | |
}) {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readonly:3x9xi32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> (), sym_name = "pad_test_dispatch_1"} : () -> () | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: error: failed to legalize operation 'linalg.copy' | |
%res = "mhlo.pad"(%input, %c0) { | |
^ | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: note: see current operation: "linalg.copy"(%0, %23) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>>) -> () | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToGPUPass Failed *** | |
"hal.executable.target"() ( { | |
"module"() ( { | |
"func"() ( { | |
%c0 = "std.constant"() {value = 0 : index} : () -> index | |
%0 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro0} : (index) -> memref<2x3xi32> | |
%1 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro1} : (index) -> memref<3x9xi32> | |
%2 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@wo2} : (index) -> memref<3x9xi32> | |
"linalg.copy"(%1, %2) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<3x9xi32>, memref<3x9xi32>) -> () | |
%3 = "std.subview"(%2) {operand_segment_sizes = dense<[1, 0, 0, 0]> : vector<4xi32>, static_offsets = [0, 1], static_sizes = [2, 3], static_strides = [1, 1]} : (memref<3x9xi32>) -> memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
"linalg.copy"(%0, %3) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>>) -> () | |
"std.return"() : () -> () | |
}) {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}, sym_name = "pad_test_dispatch_1", type = () -> ()} : () -> () | |
"hal.interface"() ( { | |
"hal.interface.binding"() {access = 1 : i32, binding = 0 : i32, set = 0 : i32, sym_name = "ro0", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 1 : i32, binding = 1 : i32, set = 0 : i32, sym_name = "ro1", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 6 : i32, binding = 2 : i32, set = 0 : i32, sym_name = "wo2", type = 7 : i32} : () -> () | |
"hal.interface_end"() : () -> () | |
}) {sym_name = "legacy_io", sym_visibility = "private"} : () -> () | |
"module_terminator"() : () -> () | |
}) {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} : () -> () | |
"hal.executable.target_end"() : () -> () | |
}) {sym_name = "vulkan_spirv", target_backend_filter = "vulkan*"} : () -> () | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: error: failed to run translation of source executable to target executable for backend vulkan* | |
%res = "mhlo.pad"(%input, %c0) { | |
^ | |
../iree/test/e2e/xla_ops/pad.mlir:4:10: note: see current operation: "hal.executable.target"() ( { | |
"module"() ( { | |
"func"() ( { | |
%c0 = "std.constant"() {value = 0 : index} : () -> index | |
%0 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro0} : (index) -> memref<2x3xi32> | |
%1 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro1} : (index) -> memref<3x9xi32> | |
%2 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@wo2} : (index) -> memref<3x9xi32> | |
"linalg.copy"(%1, %2) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<3x9xi32>, memref<3x9xi32>) -> () | |
%3 = "std.subview"(%2) {operand_segment_sizes = dense<[1, 0, 0, 0]> : vector<4xi32>, static_offsets = [0, 1], static_sizes = [2, 3], static_strides = [1, 1]} : (memref<3x9xi32>) -> memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
"linalg.copy"(%0, %3) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>>) -> () | |
"std.return"() : () -> () | |
}) {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}, sym_name = "pad_test_dispatch_1", type = () -> ()} : () -> () | |
"hal.interface"() ( { | |
"hal.interface.binding"() {access = 1 : i32, binding = 0 : i32, set = 0 : i32, sym_name = "ro0", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 1 : i32, binding = 1 : i32, set = 0 : i32, sym_name = "ro1", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 6 : i32, binding = 2 : i32, set = 0 : i32, sym_name = "wo2", type = 7 : i32} : () -> () | |
"hal.interface_end"() : () -> () | |
}) {sym_name = "legacy_io", sym_visibility = "private"} : () -> () | |
"module_terminator"() : () -> () | |
}) {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} : () -> () | |
"hal.executable.target_end"() : () -> () | |
}) {sym_name = "vulkan_spirv", target_backend_filter = "vulkan*"} : () -> () | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass Failed *** | |
"hal.executable.target"() ( { | |
"module"() ( { | |
"func"() ( { | |
%c0 = "std.constant"() {value = 0 : index} : () -> index | |
%0 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro0} : (index) -> memref<2x3xi32> | |
%1 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@ro1} : (index) -> memref<3x9xi32> | |
%2 = "hal.interface.binding.subspan"(%c0) {binding = @legacy_io::@wo2} : (index) -> memref<3x9xi32> | |
"linalg.copy"(%1, %2) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<3x9xi32>, memref<3x9xi32>) -> () | |
%3 = "std.subview"(%2) {operand_segment_sizes = dense<[1, 0, 0, 0]> : vector<4xi32>, static_offsets = [0, 1], static_sizes = [2, 3], static_strides = [1, 1]} : (memref<3x9xi32>) -> memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
"linalg.copy"(%0, %3) ( { | |
^bb0(%arg0: i32, %arg1: i32): // no predecessors | |
"linalg.yield"(%arg0) : (i32) -> () | |
}) : (memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>>) -> () | |
"std.return"() : () -> () | |
}) {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}, sym_name = "pad_test_dispatch_1", type = () -> ()} : () -> () | |
"hal.interface"() ( { | |
"hal.interface.binding"() {access = 1 : i32, binding = 0 : i32, set = 0 : i32, sym_name = "ro0", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 1 : i32, binding = 1 : i32, set = 0 : i32, sym_name = "ro1", type = 7 : i32} : () -> () | |
"hal.interface.binding"() {access = 6 : i32, binding = 2 : i32, set = 0 : i32, sym_name = "wo2", type = 7 : i32} : () -> () | |
"hal.interface_end"() : () -> () | |
}) {sym_name = "legacy_io", sym_visibility = "private"} : () -> () | |
"module_terminator"() : () -> () | |
}) {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} : () -> () | |
"hal.executable.target_end"() : () -> () | |
}) {sym_name = "vulkan_spirv", target_backend_filter = "vulkan*"} : () -> () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment