Created
March 14, 2021 12:28
-
-
Save antiagainst/fa3af0a00fdd14958b298b2646692b47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// *** IR Dump After mlir::mhlo::(anonymous namespace)::LegalizeControlFlowPass *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::HLOToHLOPreprocessing *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::DecomposeHLOClampPass *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After RemoveShapeConstraints *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToSCF *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After SCFToStandard *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToStandard *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After TosaToLinalgOnTensors *** | |
func @pad_test() attributes {iree.module.export} { | |
%0 = iree.unfoldable_constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%1 = iree.unfoldable_constant dense<0> : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq_const(%2, dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32>) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ConvertShapeToShapex *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::FlattenTuplesInCFGPass *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::LegalizeInputTypesPass *** | |
module { | |
func @pad_test() attributes {iree.module.export} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::MaterializeReflectionAttrsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::ExpandVariableDynamicDimsPass *** | |
module { | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ExpandFunctionDynamicDimsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::TieDynamicShapesPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::MaterializeShapeCalculationsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::HoistShapeCalculations *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::PrePartitioningConversionPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = "mhlo.pad"(%0, %1) {edge_padding_high = dense<[1, 5]> : tensor<2xi64>, edge_padding_low = dense<[0, 1]> : tensor<2xi64>, interior_padding = dense<0> : tensor<2xi64>} : (tensor<2x3xi32>, tensor<i32>) -> tensor<3x9xi32> | |
check.expect_eq(%2, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertHLOToLinalgOnTensorsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%c1_2 = constant 1 : index | |
%c5 = constant 5 : index | |
%c0_3 = constant 0 : index | |
%c2 = constant 2 : index | |
%c3 = constant 3 : index | |
%c1_4 = constant 1 : index | |
%c3_5 = constant 3 : index | |
%c9 = constant 9 : index | |
%3 = linalg.init_tensor [%c3, %c9] : tensor<?x?xi32> | |
%4 = linalg.fill(%3, %2) : tensor<?x?xi32>, i32 -> tensor<?x?xi32> | |
%5 = subtensor_insert %0 into %4[%c0, %c1_2] [%c2, %c3_5] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32> | |
%6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32> | |
check.expect_eq(%6, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After LinalgFoldUnitExtentDims *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%c2 = constant 2 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [%c3, %c9] : tensor<?x?xi32> | |
%4 = linalg.fill(%3, %2) : tensor<?x?xi32>, i32 -> tensor<?x?xi32> | |
%5 = subtensor_insert %0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32> | |
%6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32> | |
check.expect_eq(%6, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::FusionOfTensorOpsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%4 = linalg.fill(%3, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
%5 = subtensor_insert %0 into %4[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
check.expect_eq(%5, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::DispatchLinalgOnTensorsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch.workgroups[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = tensor.extract %5[] : tensor<i32> | |
%7 = linalg.fill(%4, %6) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %7, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
%3 = flow.dispatch.workgroups[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 = | |
(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%4 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%5 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%6 = subtensor_insert %4 into %5[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %6, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
flow.return | |
} | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch.workgroups[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> = | |
(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = tensor.extract %5[] : tensor<i32> | |
%7 = linalg.fill(%4, %6) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %7, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
flow.return | |
} | |
%3 = flow.dispatch.workgroups[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 = | |
(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%4 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%5 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%6 = subtensor_insert %4 into %5[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %6, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
flow.return | |
} | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::OutlineDispatchRegions2Pass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::DeduplicateExecutablesPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::PostPartitioningConversionPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c1 = constant 1 : index | |
%0 = iree.do_not_optimize(%cst) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst_1) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::(anonymous namespace)::HoistUnstreamableOps *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%1) : (tensor<i32>) -> tensor<3x9xi32> | |
%3 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%0, %2) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %2 | |
check.expect_eq(%3, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::FormStreamsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%c9, %c3, %c1, %1, %0) : (index, index, index, tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: index, %arg1: index, %arg2: index, %arg3: tensor<i32>, %arg4: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%arg0, %arg1, %arg2](%arg3) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%arg0, %arg1, %arg2](%arg4, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::Flow::OutlineLargeConstantsPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%c9, %c3, %c1, %1, %0) : (index, index, index, tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: index, %arg1: index, %arg2: index, %arg3: tensor<i32>, %arg4: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%arg0, %arg1, %arg2](%arg3) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%arg0, %arg1, %arg2](%arg4, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::IdentifyConstantPoolsPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeConstantPoolBuffersPass *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
flow.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_0 attributes {signature = (tensor<i32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:i32>, %arg1: !flow.dispatch.tensor<writeonly:3x9xi32>) { | |
%0 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%1 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%2 = tensor.extract %1[] : tensor<i32> | |
%3 = linalg.fill(%0, %2) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %3, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
} | |
} | |
flow.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
flow.dispatch.entry @pad_test_dispatch_1 attributes {signature = (tensor<2x3xi32>, tensor<3x9xi32>) -> tensor<3x9xi32>, workgroup_rank = 3 : index} | |
module { | |
func @pad_test_dispatch_1(%arg0: !flow.dispatch.tensor<readonly:2x3xi32>, %arg1: !flow.dispatch.tensor<readwrite:3x9xi32>) { | |
%0 = flow.dispatch.tensor.load %arg0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%1 = flow.dispatch.tensor.load %arg1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%2 = subtensor_insert %0 into %1[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %2, %arg1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeInterfacesPass *** | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%2 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%3 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%4 = subtensor_insert %2 into %3[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %4, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%cst = constant dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%cst_0 = constant dense<0> : tensor<i32> | |
%cst_1 = constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cst_1) : tensor<2x3xi32> | |
%1 = iree.do_not_optimize(%cst_0) : tensor<i32> | |
%2 = flow.ex.stream.fragment(%1, %0) : (tensor<i32>, tensor<2x3xi32>) -> tensor<3x9xi32> = | |
(%arg0: tensor<i32>, %arg1: tensor<2x3xi32>) -> tensor<3x9xi32> { | |
%c9 = constant 9 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%3 = flow.dispatch @pad_test_dispatch_0::@pad_test_dispatch_0[%c9, %c3, %c1](%arg0) : (tensor<i32>) -> tensor<3x9xi32> | |
%4 = flow.dispatch @pad_test_dispatch_1::@pad_test_dispatch_1[%c9, %c3, %c1](%arg1, %3) : (tensor<2x3xi32>, tensor<3x9xi32>) -> %3 | |
flow.return %4 : tensor<3x9xi32> | |
} | |
check.expect_eq(%2, %cst) : tensor<3x9xi32> | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::PropagateConstantWorkgroupInfoPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%2 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%3 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%4 = tensor.extract %3[] : tensor<i32> | |
%5 = linalg.fill(%2, %4) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
flow.dispatch.tensor.store %5, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<writeonly:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::LinalgBufferizePass *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = linalg.init_tensor [3, 9] : tensor<3x9xi32> | |
%5 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:i32> -> tensor<i32> | |
%6 = load %0[] : memref<i32> | |
%7 = tensor.extract %5[] : tensor<i32> | |
linalg.fill(%2, %6) : memref<3x9xi32>, i32 | |
%8 = linalg.fill(%4, %7) : tensor<3x9xi32>, i32 -> tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = load %0[] : memref<i32> | |
linalg.fill(%2, %4) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:i32> | |
%2 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : !flow.dispatch.tensor<writeonly:3x9xi32> | |
%4 = load %0[] : memref<i32> | |
linalg.fill(%2, %4) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::RemoveDeadMemAllocsPass *** | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
// *** IR Dump After (anonymous namespace)::CopyRemovalPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConcretizeTileAmongWorkgroupsPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::TileAndVectorizeInOneWorkgroupPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
linalg.fill(%1, %2) : memref<3x9xi32>, i32 | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToGPUPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%c0_0 = constant 0 : index | |
%3 = dim %1, %c0_0 : memref<3x9xi32> | |
%c1 = constant 1 : index | |
%4 = dim %1, %c1 : memref<3x9xi32> | |
%c0_1 = constant 0 : index | |
%c1_2 = constant 1 : index | |
%c1_3 = constant 1 : index | |
%5 = subi %4, %c0_1 : index | |
%6 = divi_signed %5, %c1_2 : index | |
%7 = muli %c1_3, %6 : index | |
%8 = subi %3, %c0_1 : index | |
%9 = divi_signed %8, %c1_2 : index | |
%10 = muli %7, %9 : index | |
%c0_4 = constant 0 : index | |
%c1_5 = constant 1 : index | |
%11 = "gpu.grid_dim"() {dimension = "x"} : () -> index | |
%12 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%13 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%14 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%15 = muli %12, %13 : index | |
%16 = addi %15, %14 : index | |
%17 = muli %13, %11 : index | |
%18 = muli %16, %c1_5 : index | |
%19 = addi %c0_4, %18 : index | |
%20 = cmpi slt, %19, %10 : index | |
scf.if %20 { | |
%21 = divi_signed %19, %7 : index | |
%22 = affine.apply affine_map<(d0) -> (d0)>(%21) | |
%23 = affine.apply affine_map<(d0) -> (d0)>(%21) | |
%24 = remi_signed %19, %7 : index | |
%25 = divi_signed %24, %c1_3 : index | |
%26 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%27 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%28 = remi_signed %24, %c1_3 : index | |
%29 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%30 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
%31 = load %1[%29, %30] : memref<3x9xi32> | |
%32 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%33 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
store %2, %1[%32, %33] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertVectorToGPUPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After ConvertAffineToStandard *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ResolveShapeOpsPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorTransferOptimizationPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After LegalizeStandardForSPIRV *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorizeMemRefPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ForOpCanonicalizationPass *** | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_0() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c27 = constant 27 : index | |
%c9 = constant 9 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<i32> | |
%1 = hal.interface.binding.subspan @legacy_io::@wo1[%c0] : memref<3x9xi32> | |
%2 = load %0[] : memref<i32> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c27 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c9 : index | |
%10 = remi_signed %7, %c9 : index | |
store %2, %1[%9, %10] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToSPIRVPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 27 : i32 | |
%2 = spv.Constant 9 : i32 | |
%3 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%4 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.Constant 0 : i32 | |
%6 = spv.AccessChain %3[%5, %5] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.Constant 32 : i32 | |
%12 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%13 = spv.Load "Input" %12 : vector<3xi32> | |
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32> | |
%15 = spv.IMul %10, %11 : i32 | |
%16 = spv.IAdd %15, %14 : i32 | |
%17 = spv.SLessThan %16, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %17, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%18 = spv.SDiv %16, %2 : i32 | |
%19 = spv.GLSL.SAbs %16 : i32 | |
%20 = spv.GLSL.SAbs %2 : i32 | |
%21 = spv.UMod %19, %20 : i32 | |
%22 = spv.IEqual %16, %19 : i32 | |
%23 = spv.SNegate %21 : i32 | |
%24 = spv.Select %22, %21, %23 : i1, i32 | |
%25 = spv.Constant 0 : i32 | |
%26 = spv.Constant 0 : i32 | |
%27 = spv.Constant 9 : i32 | |
%28 = spv.IMul %27, %18 : i32 | |
%29 = spv.IAdd %26, %28 : i32 | |
%30 = spv.Constant 1 : i32 | |
%31 = spv.IMul %30, %24 : i32 | |
%32 = spv.IAdd %29, %31 : i32 | |
%33 = spv.AccessChain %4[%25, %32] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %33, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
// *** IR Dump After SPIRVLowerABIAttributes *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 27 : i32 | |
%2 = spv.Constant 9 : i32 | |
%3 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%4 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.Constant 0 : i32 | |
%6 = spv.AccessChain %3[%5, %5] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.Constant 32 : i32 | |
%12 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%13 = spv.Load "Input" %12 : vector<3xi32> | |
%14 = spv.CompositeExtract %13[0 : i32] : vector<3xi32> | |
%15 = spv.IMul %10, %11 : i32 | |
%16 = spv.IAdd %15, %14 : i32 | |
%17 = spv.SLessThan %16, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %17, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%18 = spv.SDiv %16, %2 : i32 | |
%19 = spv.GLSL.SAbs %16 : i32 | |
%20 = spv.GLSL.SAbs %2 : i32 | |
%21 = spv.UMod %19, %20 : i32 | |
%22 = spv.IEqual %16, %19 : i32 | |
%23 = spv.SNegate %21 : i32 | |
%24 = spv.Select %22, %21, %23 : i1, i32 | |
%25 = spv.Constant 0 : i32 | |
%26 = spv.Constant 0 : i32 | |
%27 = spv.Constant 9 : i32 | |
%28 = spv.IMul %27, %18 : i32 | |
%29 = spv.IAdd %26, %28 : i32 | |
%30 = spv.Constant 1 : i32 | |
%31 = spv.IMul %30, %24 : i32 | |
%32 = spv.IAdd %29, %31 : i32 | |
%33 = spv.AccessChain %4[%25, %32] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %33, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After Canonicalizer *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After CSE *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After SPIRVUpdateVCE *** | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::PropagateConstantWorkgroupInfoPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%2 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%3 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%4 = subtensor_insert %2 into %3[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %4, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%2 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%3 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%4 = subtensor_insert %2 into %3[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %4, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
// *** IR Dump After Inliner *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%2 = flow.dispatch.tensor.load %0 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%3 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%4 = subtensor_insert %2 into %3[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
flow.dispatch.tensor.store %4, %1 : tensor<3x9xi32> -> !flow.dispatch.tensor<readwrite:3x9xi32> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::LinalgBufferizePass *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%4 = flow.dispatch.tensor.load %1 : !flow.dispatch.tensor<readonly:2x3xi32> -> tensor<2x3xi32> | |
%5 = flow.dispatch.tensor.load %3 : !flow.dispatch.tensor<readwrite:3x9xi32> -> tensor<3x9xi32> | |
%c0_0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%c9 = constant 9 : index | |
%6 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %6) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%7 = subtensor_insert %4 into %5[0, 1] [2, 3] [1, 1] : tensor<2x3xi32> into tensor<3x9xi32> | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%4 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %4) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : !flow.dispatch.tensor<readonly:2x3xi32> | |
%2 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%3 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : !flow.dispatch.tensor<readwrite:3x9xi32> | |
%4 = subview %2[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %4) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::RemoveDeadMemAllocsPass *** | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
// *** IR Dump After (anonymous namespace)::CopyRemovalPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConcretizeTileAmongWorkgroupsPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::TileAndVectorizeInOneWorkgroupPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
linalg.copy(%0, %2) : memref<2x3xi32>, memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToGPUPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%c0_0 = constant 0 : index | |
%3 = dim %0, %c0_0 : memref<2x3xi32> | |
%c1 = constant 1 : index | |
%4 = dim %0, %c1 : memref<2x3xi32> | |
%c0_1 = constant 0 : index | |
%5 = dim %2, %c0_1 : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%c1_2 = constant 1 : index | |
%6 = dim %2, %c1_2 : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%c0_3 = constant 0 : index | |
%c1_4 = constant 1 : index | |
%c1_5 = constant 1 : index | |
%7 = subi %4, %c0_3 : index | |
%8 = divi_signed %7, %c1_4 : index | |
%9 = muli %c1_5, %8 : index | |
%10 = subi %3, %c0_3 : index | |
%11 = divi_signed %10, %c1_4 : index | |
%12 = muli %9, %11 : index | |
%c0_6 = constant 0 : index | |
%c1_7 = constant 1 : index | |
%13 = "gpu.grid_dim"() {dimension = "x"} : () -> index | |
%14 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%15 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%16 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%17 = muli %14, %15 : index | |
%18 = addi %17, %16 : index | |
%19 = muli %15, %13 : index | |
%20 = muli %18, %c1_7 : index | |
%21 = addi %c0_6, %20 : index | |
%22 = cmpi slt, %21, %12 : index | |
scf.if %22 { | |
%23 = divi_signed %21, %9 : index | |
%24 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%25 = affine.apply affine_map<(d0) -> (d0)>(%23) | |
%26 = remi_signed %21, %9 : index | |
%27 = divi_signed %26, %c1_5 : index | |
%28 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
%29 = affine.apply affine_map<(d0) -> (d0)>(%27) | |
%30 = remi_signed %26, %c1_5 : index | |
%31 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%32 = affine.apply affine_map<(d0) -> (d0)>(%29) | |
%33 = load %0[%31, %32] : memref<2x3xi32> | |
%34 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%35 = affine.apply affine_map<(d0) -> (d0)>(%29) | |
%36 = load %2[%34, %35] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%37 = affine.apply affine_map<(d0) -> (d0)>(%25) | |
%38 = affine.apply affine_map<(d0) -> (d0)>(%29) | |
store %33, %2[%37, %38] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertVectorToGPUPass *** | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
// *** IR Dump After ConvertAffineToStandard *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ResolveShapeOpsPass *** | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorTransferOptimizationPass *** | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = subview %1[0, 1] [2, 3] [1, 1] : memref<3x9xi32> to memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
%3 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%4 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%5 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%6 = muli %3, %4 : index | |
%7 = addi %6, %5 : index | |
%8 = cmpi slt, %7, %c6 : index | |
scf.if %8 { | |
%9 = divi_signed %7, %c3 : index | |
%10 = remi_signed %7, %c3 : index | |
%11 = load %0[%9, %10] : memref<2x3xi32> | |
store %11, %2[%9, %10] : memref<2x3xi32, affine_map<(d0, d1) -> (d0 * 9 + d1 + 1)>> | |
} | |
return | |
} | |
// *** IR Dump After LegalizeStandardForSPIRV *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::VectorizeMemRefPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ForOpCanonicalizationPass *** | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After CSE *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
func @pad_test_dispatch_1() attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%c0 = constant 0 : index | |
%c6 = constant 6 : index | |
%c3 = constant 3 : index | |
%c1 = constant 1 : index | |
%0 = hal.interface.binding.subspan @legacy_io::@ro0[%c0] : memref<2x3xi32> | |
%1 = hal.interface.binding.subspan @legacy_io::@rw1[%c0] : memref<3x9xi32> | |
%2 = "gpu.block_id"() {dimension = "x"} : () -> index | |
%3 = "gpu.block_dim"() {dimension = "x"} : () -> index | |
%4 = "gpu.thread_id"() {dimension = "x"} : () -> index | |
%5 = muli %2, %3 : index | |
%6 = addi %5, %4 : index | |
%7 = cmpi slt, %6, %c6 : index | |
scf.if %7 { | |
%8 = divi_signed %6, %c3 : index | |
%9 = remi_signed %6, %c3 : index | |
%10 = load %0[%8, %9] : memref<2x3xi32> | |
%11 = addi %9, %c1 : index | |
store %10, %1[%8, %11] : memref<3x9xi32> | |
} | |
return | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::(anonymous namespace)::ConvertToSPIRVPass *** | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 6 : i32 | |
%2 = spv.Constant 3 : i32 | |
%3 = spv.Constant 1 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%7 = spv.Load "Input" %6 : vector<3xi32> | |
%8 = spv.CompositeExtract %7[0 : i32] : vector<3xi32> | |
%9 = spv.Constant 32 : i32 | |
%10 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%11 = spv.Load "Input" %10 : vector<3xi32> | |
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32> | |
%13 = spv.IMul %8, %9 : i32 | |
%14 = spv.IAdd %13, %12 : i32 | |
%15 = spv.SLessThan %14, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %15, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%16 = spv.SDiv %14, %2 : i32 | |
%17 = spv.GLSL.SAbs %14 : i32 | |
%18 = spv.GLSL.SAbs %2 : i32 | |
%19 = spv.UMod %17, %18 : i32 | |
%20 = spv.IEqual %14, %17 : i32 | |
%21 = spv.SNegate %19 : i32 | |
%22 = spv.Select %20, %19, %21 : i1, i32 | |
%23 = spv.Constant 0 : i32 | |
%24 = spv.Constant 0 : i32 | |
%25 = spv.Constant 3 : i32 | |
%26 = spv.IMul %25, %16 : i32 | |
%27 = spv.IAdd %24, %26 : i32 | |
%28 = spv.Constant 1 : i32 | |
%29 = spv.IMul %28, %22 : i32 | |
%30 = spv.IAdd %27, %29 : i32 | |
%31 = spv.AccessChain %4[%23, %30] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%32 = spv.Load "StorageBuffer" %31 : i32 | |
%33 = spv.IAdd %22, %3 : i32 | |
%34 = spv.Constant 0 : i32 | |
%35 = spv.Constant 0 : i32 | |
%36 = spv.Constant 9 : i32 | |
%37 = spv.IMul %36, %16 : i32 | |
%38 = spv.IAdd %35, %37 : i32 | |
%39 = spv.Constant 1 : i32 | |
%40 = spv.IMul %39, %33 : i32 | |
%41 = spv.IAdd %38, %40 : i32 | |
%42 = spv.AccessChain %5[%34, %41] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %42, %32 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
// *** IR Dump After SPIRVLowerABIAttributes *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 0 : i32 | |
%1 = spv.Constant 6 : i32 | |
%2 = spv.Constant 3 : i32 | |
%3 = spv.Constant 1 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%7 = spv.Load "Input" %6 : vector<3xi32> | |
%8 = spv.CompositeExtract %7[0 : i32] : vector<3xi32> | |
%9 = spv.Constant 32 : i32 | |
%10 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%11 = spv.Load "Input" %10 : vector<3xi32> | |
%12 = spv.CompositeExtract %11[0 : i32] : vector<3xi32> | |
%13 = spv.IMul %8, %9 : i32 | |
%14 = spv.IAdd %13, %12 : i32 | |
%15 = spv.SLessThan %14, %1 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %15, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%16 = spv.SDiv %14, %2 : i32 | |
%17 = spv.GLSL.SAbs %14 : i32 | |
%18 = spv.GLSL.SAbs %2 : i32 | |
%19 = spv.UMod %17, %18 : i32 | |
%20 = spv.IEqual %14, %17 : i32 | |
%21 = spv.SNegate %19 : i32 | |
%22 = spv.Select %20, %19, %21 : i1, i32 | |
%23 = spv.Constant 0 : i32 | |
%24 = spv.Constant 0 : i32 | |
%25 = spv.Constant 3 : i32 | |
%26 = spv.IMul %25, %16 : i32 | |
%27 = spv.IAdd %24, %26 : i32 | |
%28 = spv.Constant 1 : i32 | |
%29 = spv.IMul %28, %22 : i32 | |
%30 = spv.IAdd %27, %29 : i32 | |
%31 = spv.AccessChain %4[%23, %30] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%32 = spv.Load "StorageBuffer" %31 : i32 | |
%33 = spv.IAdd %22, %3 : i32 | |
%34 = spv.Constant 0 : i32 | |
%35 = spv.Constant 0 : i32 | |
%36 = spv.Constant 9 : i32 | |
%37 = spv.IMul %36, %16 : i32 | |
%38 = spv.IAdd %35, %37 : i32 | |
%39 = spv.Constant 1 : i32 | |
%40 = spv.IMul %39, %33 : i32 | |
%41 = spv.IAdd %38, %40 : i32 | |
%42 = spv.AccessChain %5[%34, %41] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %42, %32 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After Canonicalizer *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After CSE *** | |
spv.module Logical GLSL450 { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After SPIRVUpdateVCE *** | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass *** | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::(anonymous namespace)::ConvertToHALPass *** | |
#map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)> | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%cbuffer = hal.allocator.allocate.const %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%dev_0 = hal.ex.shared_device : !hal.device | |
%allocator_1 = hal.device.allocator %dev_0 : !hal.allocator | |
%cbuffer_2 = hal.allocator.allocate.const %allocator_1, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<0> : tensor<i32> | |
%dev_3 = hal.ex.shared_device : !hal.device | |
%allocator_4 = hal.device.allocator %dev_3 : !hal.allocator | |
%cbuffer_5 = hal.allocator.allocate.const %allocator_4, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cbuffer_5) : !hal.buffer | |
%1 = iree.do_not_optimize(%cbuffer_2) : !hal.buffer | |
%dev_6 = hal.ex.shared_device : !hal.device | |
%allocator_7 = hal.device.allocator %dev_6 : !hal.allocator | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%sz = hal.allocator.compute_size %allocator_7, shape = [%c3, %c9], element_type = %c16777248_i32 | |
%buffer = hal.allocator.allocate %allocator_7, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %sz : !hal.buffer | |
%cmd = hal.command_buffer.create %dev_6, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%c9_8 = constant 9 : index | |
%c3_9 = constant 3 : index | |
%c1 = constant 1 : index | |
%executable_layout = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
%c0 = constant 0 : index | |
%allocator_10 = hal.buffer.allocator %1 : !hal.allocator | |
%c16777248_i32_11 = constant 16777248 : i32 | |
%sz_12 = hal.allocator.compute_size %allocator_10, shape = [], element_type = %c16777248_i32_11 | |
%c0_13 = constant 0 : index | |
%c3_14 = constant 3 : index | |
%c9_15 = constant 9 : index | |
%allocator_16 = hal.buffer.allocator %buffer : !hal.allocator | |
%c16777248_i32_17 = constant 16777248 : i32 | |
%sz_18 = hal.allocator.compute_size %allocator_16, shape = [%c3_14, %c9_15], element_type = %c16777248_i32_17 | |
%c1_19 = constant 1 : index | |
%c0_20 = constant 0 : index | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0_20, bindings = [%c0_13 = (%1, %c0, %sz_12), %c1_19 = (%buffer, %c0, %sz_18)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9_8 : index, %arg2 = %c3_9 : index, %arg3 = %c1 : index) { | |
%c1_42 = constant 1 : index | |
%2 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%2, %c1_42, %c1_42] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_21 = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
%c0_22 = constant 0 : index | |
%c2 = constant 2 : index | |
%c3_23 = constant 3 : index | |
%allocator_24 = hal.buffer.allocator %0 : !hal.allocator | |
%c16777248_i32_25 = constant 16777248 : i32 | |
%sz_26 = hal.allocator.compute_size %allocator_24, shape = [%c2, %c3_23], element_type = %c16777248_i32_25 | |
%c0_27 = constant 0 : index | |
%c3_28 = constant 3 : index | |
%c9_29 = constant 9 : index | |
%allocator_30 = hal.buffer.allocator %buffer : !hal.allocator | |
%c16777248_i32_31 = constant 16777248 : i32 | |
%sz_32 = hal.allocator.compute_size %allocator_30, shape = [%c3_28, %c9_29], element_type = %c16777248_i32_31 | |
%c1_33 = constant 1 : index | |
%c0_34 = constant 0 : index | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_21, set = %c0_34, bindings = [%c0_27 = (%0, %c0_22, %sz_26), %c1_33 = (%buffer, %c0_22, %sz_32)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9_8 : index, %arg2 = %c3_9 : index, %arg3 = %c1 : index) { | |
%c1_42 = constant 1 : index | |
%2 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%2, %c1_42, %c1_42] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev_6, %cmd | |
%c3_35 = constant 3 : index | |
%c9_36 = constant 9 : index | |
%c16777248_i32_37 = constant 16777248 : i32 | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32_37, shape = [%c3_35, %c9_36] : !hal.buffer_view | |
%c3_38 = constant 3 : index | |
%c9_39 = constant 9 : index | |
%c16777248_i32_40 = constant 16777248 : i32 | |
%view_41 = hal.buffer_view.create %cbuffer, element_type = %c16777248_i32_40, shape = [%c3_38, %c9_39] : !hal.buffer_view | |
check.expect_eq(%view, %view_41) : !hal.buffer_view | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::Shape::(anonymous namespace)::ExpandFunctionRankedShapeDimsPass *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%cbuffer = hal.allocator.allocate.const %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%dev_0 = hal.ex.shared_device : !hal.device | |
%allocator_1 = hal.device.allocator %dev_0 : !hal.allocator | |
%cbuffer_2 = hal.allocator.allocate.const %allocator_1, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<0> : tensor<i32> | |
%dev_3 = hal.ex.shared_device : !hal.device | |
%allocator_4 = hal.device.allocator %dev_3 : !hal.allocator | |
%cbuffer_5 = hal.allocator.allocate.const %allocator_4, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch" : !hal.buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%0 = iree.do_not_optimize(%cbuffer_5) : !hal.buffer | |
%1 = iree.do_not_optimize(%cbuffer_2) : !hal.buffer | |
%dev_6 = hal.ex.shared_device : !hal.device | |
%allocator_7 = hal.device.allocator %dev_6 : !hal.allocator | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%sz = hal.allocator.compute_size %allocator_7, shape = [%c3, %c9], element_type = %c16777248_i32 | |
%buffer = hal.allocator.allocate %allocator_7, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %sz : !hal.buffer | |
%cmd = hal.command_buffer.create %dev_6, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%c9_8 = constant 9 : index | |
%c3_9 = constant 3 : index | |
%c1 = constant 1 : index | |
%executable_layout = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
%c0 = constant 0 : index | |
%allocator_10 = hal.buffer.allocator %1 : !hal.allocator | |
%c16777248_i32_11 = constant 16777248 : i32 | |
%sz_12 = hal.allocator.compute_size %allocator_10, shape = [], element_type = %c16777248_i32_11 | |
%c0_13 = constant 0 : index | |
%c3_14 = constant 3 : index | |
%c9_15 = constant 9 : index | |
%allocator_16 = hal.buffer.allocator %buffer : !hal.allocator | |
%c16777248_i32_17 = constant 16777248 : i32 | |
%sz_18 = hal.allocator.compute_size %allocator_16, shape = [%c3_14, %c9_15], element_type = %c16777248_i32_17 | |
%c1_19 = constant 1 : index | |
%c0_20 = constant 0 : index | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0_20, bindings = [%c0_13 = (%1, %c0, %sz_12), %c1_19 = (%buffer, %c0, %sz_18)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9_8 : index, %arg2 = %c3_9 : index, %arg3 = %c1 : index) { | |
%c1_42 = constant 1 : index | |
%2 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%2, %c1_42, %c1_42] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_21 = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
%c0_22 = constant 0 : index | |
%c2 = constant 2 : index | |
%c3_23 = constant 3 : index | |
%allocator_24 = hal.buffer.allocator %0 : !hal.allocator | |
%c16777248_i32_25 = constant 16777248 : i32 | |
%sz_26 = hal.allocator.compute_size %allocator_24, shape = [%c2, %c3_23], element_type = %c16777248_i32_25 | |
%c0_27 = constant 0 : index | |
%c3_28 = constant 3 : index | |
%c9_29 = constant 9 : index | |
%allocator_30 = hal.buffer.allocator %buffer : !hal.allocator | |
%c16777248_i32_31 = constant 16777248 : i32 | |
%sz_32 = hal.allocator.compute_size %allocator_30, shape = [%c3_28, %c9_29], element_type = %c16777248_i32_31 | |
%c1_33 = constant 1 : index | |
%c0_34 = constant 0 : index | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_21, set = %c0_34, bindings = [%c0_27 = (%0, %c0_22, %sz_26), %c1_33 = (%buffer, %c0_22, %sz_32)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9_8 : index, %arg2 = %c3_9 : index, %arg3 = %c1 : index) { | |
%c1_42 = constant 1 : index | |
%2 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%2, %c1_42, %c1_42] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev_6, %cmd | |
%c3_35 = constant 3 : index | |
%c9_36 = constant 9 : index | |
%c16777248_i32_37 = constant 16777248 : i32 | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32_37, shape = [%c3_35, %c9_36] : !hal.buffer_view | |
%c3_38 = constant 3 : index | |
%c9_39 = constant 9 : index | |
%c16777248_i32_40 = constant 16777248 : i32 | |
%view_41 = hal.buffer_view.create %cbuffer, element_type = %c16777248_i32_40, shape = [%c3_38, %c9_39] : !hal.buffer_view | |
check.expect_eq(%view, %view_41) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%dev_0 = hal.ex.shared_device : !hal.device | |
%allocator_1 = hal.device.allocator %dev_0 : !hal.allocator | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_2 = hal.allocator.map %allocator_1, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%dev_3 = hal.ex.shared_device : !hal.device | |
%allocator_4 = hal.device.allocator %dev_3 : !hal.allocator | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_5 = hal.allocator.map %allocator_4, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_5) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_2) : !hal.buffer | |
%dev_6 = hal.ex.shared_device : !hal.device | |
%allocator_7 = hal.device.allocator %dev_6 : !hal.allocator | |
%buffer = hal.allocator.allocate %allocator_7, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev_6, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_10 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%5, %c1_10, %c1_10] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_8 = hal.executable_layout.lookup %dev_6, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev_6 : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_10 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%5, %c1_10, %c1_10] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev_6, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_9 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_9) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export, iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::(anonymous namespace)::PublicABIGenerationPass *** | |
#map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)> | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::LinkExecutablesPass *** | |
#map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)> | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_0::@vulkan_spirv::@pad_test_dispatch_0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
hal.command_buffer.dispatch.symbol %arg0, @pad_test_dispatch_1::@vulkan_spirv::@pad_test_dispatch_1, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::ResolveEntryPointOrdinalsPass *** | |
#map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)> | |
module { | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply #map()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%executable_layout = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%executable_layout_2 = hal.executable_layout.lookup %dev, set_layouts = [[#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">]] : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %executable_layout_2, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_4 = constant 1 : index | |
%5 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%arg1, %arg2, %arg3] | |
%6 = hal.command_buffer.device %arg0 : !hal.device | |
%exe = hal.executable.lookup %6, @pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %exe, entry_point = 0, workgroup_xyz = [%5, %c1_4, %c1_4] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_3 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_3) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeResourceCachesPass *** | |
#map = affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)> | |
module { | |
hal.variable @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_0 init(@_executable_layout_0_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_1 init(@_executable_layout_1_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.switch(%dev : !hal.device) -> !hal.executable | |
#hal.device.match.id<"vulkan*">(%arg0 = %dev : !hal.device) { | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %arg0, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
hal.return %exe : !hal.executable | |
}, | |
#hal.match.always() { | |
%1 = iree.null : !hal.executable | |
hal.return %1 : !hal.executable | |
} | |
return %0 : !hal.executable | |
} | |
hal.variable @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.switch(%dev : !hal.device) -> !hal.executable | |
#hal.device.match.id<"vulkan*">(%arg0 = %dev : !hal.device) { | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %arg0, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
hal.return %exe : !hal.executable | |
}, | |
#hal.match.always() { | |
%1 = iree.null : !hal.executable | |
hal.return %1 : !hal.executable | |
} | |
return %0 : !hal.executable | |
} | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = affine.apply #map()[%arg0, %arg1, %arg2] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_3 = constant 1 : index | |
%7 = affine.apply #map()[%arg1, %arg2, %arg3] | |
%8 = hal.command_buffer.device %arg0 : !hal.device | |
%9 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %9, entry_point = 0, workgroup_xyz = [%7, %c1_3, %c1_3] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%6 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %6, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
hal.device.switch(%dev : !hal.device) | |
#hal.device.match.id<"vulkan*">(%arg0 = %cmd : !hal.command_buffer, %arg1 = %c9 : index, %arg2 = %c3 : index, %arg3 = %c1 : index) { | |
%c1_3 = constant 1 : index | |
%7 = affine.apply #map()[%arg1, %arg2, %arg3] | |
%8 = hal.command_buffer.device %arg0 : !hal.device | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %arg0, %9, entry_point = 0, workgroup_xyz = [%7, %c1_3, %c1_3] | |
hal.return | |
} | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%c1_2 = constant 1 : index | |
%7 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%c9, %c3, %c1] | |
%8 = hal.command_buffer.device %cmd : !hal.device | |
%9 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%7, %c1_2, %c1_2] | |
br ^bb3 | |
^bb2: // pred: ^bb0 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%10 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %10, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%11 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%c1_3 = constant 1 : index | |
%12 = affine.apply affine_map<()[s0, s1, s2] -> (((s0 * s1) * s2) ceildiv 32)>()[%c9, %c3, %c1] | |
%13 = hal.command_buffer.device %cmd : !hal.device | |
%14 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %14, entry_point = 0, workgroup_xyz = [%12, %c1_3, %c1_3] | |
br ^bb6 | |
^bb5: // pred: ^bb3 | |
iree.unreachable | |
^bb6: // pred: ^bb4 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_4 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_4) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After ConvertAffineToStandard *** | |
module { | |
hal.variable @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_0 init(@_executable_layout_0_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_1 init(@_executable_layout_1_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
hal.variable @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = muli %arg0, %arg1 : index | |
%1 = muli %0, %arg2 : index | |
%c32 = constant 32 : index | |
%c0 = constant 0 : index | |
%c1_0 = constant 1 : index | |
%2 = cmpi sle, %1, %c0 : index | |
%3 = subi %c0, %1 : index | |
%4 = subi %1, %c1_0 : index | |
%5 = select %2, %3, %4 : index | |
%6 = divi_signed %5, %c32 : index | |
%7 = subi %c0, %6 : index | |
%8 = addi %6, %c1_0 : index | |
%9 = select %2, %7, %8 : index | |
hal.return %9, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = muli %arg0, %arg1 : index | |
%1 = muli %0, %arg2 : index | |
%c32 = constant 32 : index | |
%c0 = constant 0 : index | |
%c1_0 = constant 1 : index | |
%2 = cmpi sle, %1, %c0 : index | |
%3 = subi %c0, %1 : index | |
%4 = subi %1, %c1_0 : index | |
%5 = select %2, %3, %4 : index | |
%6 = divi_signed %5, %c32 : index | |
%7 = subi %c0, %6 : index | |
%8 = addi %6, %c1_0 : index | |
%9 = select %2, %7, %8 : index | |
hal.return %9, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%c1_2 = constant 1 : index | |
%c1_3 = constant 1 : index | |
%7 = hal.command_buffer.device %cmd : !hal.device | |
%8 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %8, entry_point = 0, workgroup_xyz = [%c1_3, %c1_2, %c1_2] | |
br ^bb3 | |
^bb2: // pred: ^bb0 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%9 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %9, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%10 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
cond_br %10, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%c1_4 = constant 1 : index | |
%c1_5 = constant 1 : index | |
%11 = hal.command_buffer.device %cmd : !hal.device | |
%12 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %12, entry_point = 0, workgroup_xyz = [%c1_5, %c1_4, %c1_4] | |
br ^bb6 | |
^bb5: // pred: ^bb3 | |
iree.unreachable | |
^bb6: // pred: ^bb4 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_6 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_6) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::MemoizeDeviceQueriesPass *** | |
module { | |
hal.variable @_device_match_id_0 init(@_device_match_id_0_initializer) : i1 attributes {sym_visibility = "private"} | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
hal.variable @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_0 init(@_executable_layout_0_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_1 init(@_executable_layout_1_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
hal.variable @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = constant true | |
cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%2 = iree.null : !hal.executable | |
br ^bb5(%2 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
iree.unreachable | |
^bb5(%3: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
return %3 : !hal.executable | |
} | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:i32>, !flow.dispatch.tensor<writeonly:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = muli %arg0, %arg1 : index | |
%1 = muli %0, %arg2 : index | |
%c32 = constant 32 : index | |
%c0 = constant 0 : index | |
%c1_0 = constant 1 : index | |
%2 = cmpi sle, %1, %c0 : index | |
%3 = subi %c0, %1 : index | |
%4 = subi %1, %c1_0 : index | |
%5 = select %2, %3, %4 : index | |
%6 = divi_signed %5, %c32 : index | |
%7 = subi %c0, %6 : index | |
%8 = addi %6, %c1_0 : index | |
%9 = select %2, %7, %8 : index | |
hal.return %9, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_183968000__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_0() "None" { | |
%0 = spv.Constant 27 : i32 | |
%1 = spv.Constant 32 : i32 | |
%2 = spv.Constant 0 : i32 | |
%3 = spv.Constant 9 : i32 | |
%4 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer> | |
%5 = spv.mlir.addressof @__resource_var_183968000__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%6 = spv.AccessChain %4[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%7 = spv.Load "StorageBuffer" %6 : i32 | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %1 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %5[%2, %25] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %26, %7 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_0, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_0 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
} | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.target @vulkan_spirv, filter="vulkan*" { | |
hal.executable.entry_point @pad_test_dispatch_1 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (!flow.dispatch.tensor<readonly:2x3xi32>, !flow.dispatch.tensor<readwrite:3x9xi32>) -> ()} { | |
^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors | |
%c1 = constant 1 : index | |
%0 = muli %arg0, %arg1 : index | |
%1 = muli %0, %arg2 : index | |
%c32 = constant 32 : index | |
%c0 = constant 0 : index | |
%c1_0 = constant 1 : index | |
%2 = cmpi sle, %1, %c0 : index | |
%3 = subi %c0, %1 : index | |
%4 = subi %1, %c1_0 : index | |
%5 = select %2, %3, %4 : index | |
%6 = divi_signed %5, %c32 : index | |
%7 = subi %c0, %6 : index | |
%8 = addi %6, %c1_0 : index | |
%9 = select %2, %7, %8 : index | |
hal.return %9, %c1, %c1 : index, index, index | |
} | |
module attributes {spv.target_env = #spv.target_env<#spv.vce<v1.3, [Shader, Float16, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, VariablePointers, VariablePointersStorageBuffer], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers]>, ARM:IntegratedGPU, {cooperative_matrix_properties_nv = [], max_compute_shared_memory_size = 32768 : i32, max_compute_workgroup_invocations = 512 : i32, max_compute_workgroup_size = dense<512> : vector<3xi32>, subgroup_size = 16 : i32}>} { | |
spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> { | |
spv.GlobalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input> | |
spv.GlobalVariable @__resource_var_184152960__ bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
spv.GlobalVariable @__resource_var_183752544__ bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
spv.func @pad_test_dispatch_1() "None" { | |
%0 = spv.Constant 6 : i32 | |
%1 = spv.Constant 1 : i32 | |
%2 = spv.Constant 32 : i32 | |
%3 = spv.Constant 3 : i32 | |
%4 = spv.Constant 0 : i32 | |
%5 = spv.Constant 9 : i32 | |
%6 = spv.mlir.addressof @__resource_var_183752544__ : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer> | |
%7 = spv.mlir.addressof @__resource_var_184152960__ : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer> | |
%8 = spv.mlir.addressof @__builtin_var_WorkgroupId__ : !spv.ptr<vector<3xi32>, Input> | |
%9 = spv.Load "Input" %8 : vector<3xi32> | |
%10 = spv.CompositeExtract %9[0 : i32] : vector<3xi32> | |
%11 = spv.mlir.addressof @__builtin_var_LocalInvocationId__ : !spv.ptr<vector<3xi32>, Input> | |
%12 = spv.Load "Input" %11 : vector<3xi32> | |
%13 = spv.CompositeExtract %12[0 : i32] : vector<3xi32> | |
%14 = spv.IMul %10, %2 : i32 | |
%15 = spv.IAdd %14, %13 : i32 | |
%16 = spv.SLessThan %15, %0 : i32 | |
spv.mlir.selection { | |
spv.BranchConditional %16, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%17 = spv.SDiv %15, %3 : i32 | |
%18 = spv.GLSL.SAbs %15 : i32 | |
%19 = spv.GLSL.SAbs %3 : i32 | |
%20 = spv.UMod %18, %19 : i32 | |
%21 = spv.IEqual %15, %18 : i32 | |
%22 = spv.SNegate %20 : i32 | |
%23 = spv.Select %21, %20, %22 : i1, i32 | |
%24 = spv.IMul %17, %3 : i32 | |
%25 = spv.IAdd %24, %23 : i32 | |
%26 = spv.AccessChain %6[%4, %25] : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
%27 = spv.Load "StorageBuffer" %26 : i32 | |
%28 = spv.IAdd %23, %1 : i32 | |
%29 = spv.IMul %17, %5 : i32 | |
%30 = spv.IAdd %29, %28 : i32 | |
%31 = spv.AccessChain %7[%4, %30] : !spv.ptr<!spv.struct<(!spv.array<27 x i32, stride=4> [0])>, StorageBuffer>, i32, i32 | |
spv.Store "StorageBuffer" %31, %27 : i32 | |
spv.Branch ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
spv.mlir.merge | |
} | |
spv.Return | |
} | |
spv.EntryPoint "GLCompute" @pad_test_dispatch_1, @__builtin_var_WorkgroupId__, @__builtin_var_LocalInvocationId__ | |
spv.ExecutionMode @pad_test_dispatch_1 "LocalSize", 32, 1, 1 | |
} | |
hal.interface @legacy_io attributes {sym_visibility = "private"} { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
} | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c1 = constant 1 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%c1_2 = constant 1 : index | |
%c1_3 = constant 1 : index | |
%7 = hal.command_buffer.device %cmd : !hal.device | |
%8 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %8, entry_point = 0, workgroup_xyz = [%c1_3, %c1_2, %c1_2] | |
br ^bb3 | |
^bb2: // pred: ^bb0 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%9 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %9, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%10 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %10, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%c1_4 = constant 1 : index | |
%c1_5 = constant 1 : index | |
%11 = hal.command_buffer.device %cmd : !hal.device | |
%12 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %12, entry_point = 0, workgroup_xyz = [%c1_5, %c1_4, %c1_4] | |
br ^bb6 | |
^bb5: // pred: ^bb3 | |
iree.unreachable | |
^bb6: // pred: ^bb4 | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_6 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_6) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
// *** IR Dump After CSE *** | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After CSE *** | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After CSE *** | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After CSE *** | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After CSE *** | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After CSE *** | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After CSE *** | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::SerializeExecutablesPass *** | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8>, format = 1397773893 : i32} { | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::SerializeExecutablesPass *** | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8>, format = 1397773893 : i32} { | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%9 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %9, ^bb3, ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
%10 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %10, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
cond_br %6, ^bb3, ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
cond_br %6, ^bb3, ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After CSE *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::HAL::CSEVariableLoadsPass *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
hal.variable @_device_match_id_0 init(@_device_match_id_0_initializer) : i1 attributes {sym_visibility = "private"} | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
hal.variable @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_0 init(@_executable_layout_0_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_1 init(@_executable_layout_1_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
hal.variable @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8>, format = 1397773893 : i32} { | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8>, format = 1397773893 : i32} { | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
cond_br %6, ^bb3, ^bb2 | |
^bb2: // 2 preds: ^bb0, ^bb1 | |
iree.unreachable | |
^bb3: // pred: ^bb1 | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After Canonicalizer *** | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
^bb2: // pred: ^bb0 | |
iree.unreachable | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
// *** IR Dump After Canonicalizer *** | |
module { | |
hal.variable @_device_match_id_0 init(@_device_match_id_0_initializer) : i1 attributes {sym_visibility = "private"} | |
func private @_device_match_id_0_initializer() -> i1 { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.device.match.id %dev, pattern = ["vulkan*"] : (!hal.device) -> i1 | |
return %0 : i1 | |
} | |
hal.variable @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_0_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Write|Discard">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_0 init(@_executable_layout_0_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_0_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !hal.descriptor_set_layout attributes {sym_visibility = "private"} | |
func private @_descriptor_set_layout_1_initializer() -> !hal.descriptor_set_layout { | |
%dev = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create %dev, PushOnly, bindings = [#hal.descriptor_set_layout_binding<0, "StorageBuffer", "Read">, #hal.descriptor_set_layout_binding<1, "StorageBuffer", "Read|Write">] : !hal.descriptor_set_layout | |
return %descriptor_set_layout : !hal.descriptor_set_layout | |
} | |
hal.variable @_executable_layout_1 init(@_executable_layout_1_initializer) : !hal.executable_layout attributes {sym_visibility = "private"} | |
func private @_executable_layout_1_initializer() -> !hal.executable_layout { | |
%0 = hal.variable.load @_descriptor_set_layout_1 : !hal.descriptor_set_layout | |
%dev = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create %dev, push_constants = 0, set_layouts = [%0] : !hal.executable_layout | |
return %executable_layout : !hal.executable_layout | |
} | |
hal.variable @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_0_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_0::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
hal.variable @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !hal.executable attributes {sym_visibility = "private"} | |
func private @_executable_pad_test_dispatch_1_initializer() -> !hal.executable { | |
%dev = hal.ex.shared_device : !hal.device | |
%0 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%1 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
%exe = hal.executable.create %dev, @pad_test_dispatch_1::@vulkan_spirv, layouts = [%1] : !hal.executable | |
br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%2 = iree.null : !hal.executable | |
br ^bb3(%2 : !hal.executable) | |
^bb3(%3: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
return %3 : !hal.executable | |
} | |
hal.executable @pad_test_dispatch_0 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @wo1, set=0, binding=1, type="StorageBuffer", access="Write|Discard" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8>, format = 1397773893 : i32} { | |
} | |
} | |
hal.executable @pad_test_dispatch_1 attributes {sym_visibility = "private"} { | |
hal.interface @legacy_io { | |
hal.interface.binding @ro0, set=0, binding=0, type="StorageBuffer", access="Read" | |
hal.interface.binding @rw1, set=0, binding=1, type="StorageBuffer", access="Read|Write" | |
} | |
hal.executable.binary @vulkan_spirv attributes {data = dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8>, format = 1397773893 : i32} { | |
} | |
} | |
func @pad_test() attributes {iree.module.export = "pad_test$raw", noinline} { | |
%c-1 = constant -1 : index | |
%c4 = constant 4 : index | |
%c24 = constant 24 : index | |
%c108 = constant 108 : index | |
%c0 = constant 0 : index | |
%c3 = constant 3 : index | |
%c9 = constant 9 : index | |
%c16777248_i32 = constant 16777248 : i32 | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator %dev : !hal.allocator | |
%0 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%mapped = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %0[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%1 = iree.byte_buffer.constant : !iree.byte_buffer = dense<0> : tensor<i32> | |
%mapped_0 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %1[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%2 = iree.byte_buffer.constant : !iree.byte_buffer = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%mapped_1 = hal.allocator.map %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %2[%c0, %c-1] : !iree.byte_buffer -> !hal.buffer | |
%3 = iree.do_not_optimize(%mapped_1) : !hal.buffer | |
%4 = iree.do_not_optimize(%mapped_0) : !hal.buffer | |
%buffer = hal.allocator.allocate %allocator, "HostVisible|DeviceVisible|DeviceLocal", "Constant|Transfer|Mapping|Dispatch", %c108 : !hal.buffer | |
%cmd = hal.command_buffer.create %dev, OneShot, "Transfer|Dispatch" : !hal.command_buffer | |
hal.command_buffer.begin %cmd | |
%5 = hal.variable.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %5, set = %c0, bindings = [%c0 = (%4, %c0, %c4), %c1 = (%buffer, %c0, %c108)] | |
%6 = hal.variable.load @_device_match_id_0 : i1 | |
cond_br %6, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%7 = hal.variable.load @_executable_pad_test_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %7, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
%8 = hal.variable.load @_executable_layout_1 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set %cmd, %8, set = %c0, bindings = [%c0 = (%3, %c0, %c24), %c1 = (%buffer, %c0, %c108)] | |
%9 = hal.variable.load @_executable_pad_test_dispatch_1 : !hal.executable | |
hal.command_buffer.dispatch %cmd, %9, entry_point = 0, workgroup_xyz = [%c1, %c1, %c1] | |
hal.command_buffer.execution_barrier %cmd, "Dispatch|CommandRetire", "CommandIssue|Dispatch", "None" | |
hal.command_buffer.end %cmd | |
hal.ex.submit_and_wait %dev, %cmd | |
%view = hal.buffer_view.create %buffer, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
%view_2 = hal.buffer_view.create %mapped, element_type = %c16777248_i32, shape = [%c3, %c9] : !hal.buffer_view | |
check.expect_eq(%view, %view_2) : !hal.buffer_view | |
return | |
^bb2: // pred: ^bb0 | |
iree.unreachable | |
} | |
func @pad_test$async(%arg0: !hal.semaphore, %arg1: index, %arg2: !hal.semaphore, %arg3: index) attributes {iree.module.export = "pad_test$async"} { | |
%0 = hal.semaphore.await %arg0, min_value = %arg1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
call @pad_test() : () -> () | |
hal.semaphore.signal %arg2, value = %arg3 | |
return | |
} | |
func @pad_test$sync() attributes {iree.abi.stub, iree.module.export = "pad_test", iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%c0 = constant 0 : index | |
%c1 = constant 1 : index | |
%dev = hal.ex.shared_device : !hal.device | |
%semaphore = hal.semaphore.create %dev, initial_value = %c0 : !hal.semaphore | |
call @pad_test$async(%semaphore, %c0, %semaphore, %c1) : (!hal.semaphore, index, !hal.semaphore, index) -> () | |
%0 = hal.semaphore.await %semaphore, min_value = %c1 : i32 | |
hal.check_success %0, "semaphore wait failed" | |
return | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::VM::ConversionPass *** | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 init(@_device_match_id_0_initializer) : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.func private @_device_match_id_0_initializer() -> i32 { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.return %0 : i32 | |
} | |
vm.global.ref @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_0_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c6 = vm.const.i32 6 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_0 init(@_executable_layout_0_initializer) : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_0_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_1_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_1 init(@_executable_layout_1_initializer) : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_1_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.func private @_executable_pad_test_dispatch_0_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.func private @_executable_pad_test_dispatch_1_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%ref_1 = vm.rodata.inline : !vm.ref<!iree.byte_buffer> = dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %ref_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_3 = vm.rodata.inline : !vm.ref<!iree.byte_buffer> = dense<0> : tensor<i32> | |
%c50_4 = vm.const.i32 50 : i32 | |
%c15_5 = vm.const.i32 15 : i32 | |
%ref_6 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_4, %c15_5, %ref_3, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_7 = vm.rodata.inline : !vm.ref<!iree.byte_buffer> = dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
%c50_8 = vm.const.i32 50 : i32 | |
%c15_9 = vm.const.i32 15 : i32 | |
%ref_10 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_8, %c15_9, %ref_7, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_10) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_6) : !vm.ref<!hal.buffer> | |
%c50_11 = vm.const.i32 50 : i32 | |
%c15_12 = vm.const.i32 15 : i32 | |
%ref_13 = vm.call @hal.allocator.allocate(%ref_0, %c50_11, %c15_12, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1_14 = vm.const.i32 1 : i32 | |
%c3_15 = vm.const.i32 3 : i32 | |
%ref_16 = vm.call @hal.command_buffer.create(%ref, %c1_14, %c3_15) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_16, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_13, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%zero_17 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_16, %_executable_pad_test_dispatch_0, %zero_17, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero_18 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_16, %c20, %c5, %zero_18) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_16, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_13, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
%zero_19 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_16, %_executable_pad_test_dispatch_1, %zero_19, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20_20 = vm.const.i32 20 : i32 | |
%c5_21 = vm.const.i32 5 : i32 | |
%zero_22 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_16, %c20_20, %c5_21, %zero_22) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_16) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_23 = vm.call.variadic @hal.buffer_view.create(%ref_13, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_24 = vm.call.variadic @hal.buffer_view.create(%ref_2, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_23, %ref_24) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
vm.call @pad_test$async(%ref_0, %zero, %ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32, !vm.ref<!hal.semaphore>, i32) -> () | |
%0 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_true(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_false(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_all_true(%operand : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_almost_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.allocator(%buffer : !vm.ref<!hal.buffer>) -> !vm.ref<!hal.allocator> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.fill(%target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.ref<!iree.byte_buffer>, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::VM::HoistInlinedRodataPass *** | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 init(@_device_match_id_0_initializer) : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.func private @_device_match_id_0_initializer() -> i32 { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.return %0 : i32 | |
} | |
vm.global.ref @_descriptor_set_layout_0 init(@_descriptor_set_layout_0_initializer) : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_0_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c6 = vm.const.i32 6 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_0 init(@_executable_layout_0_initializer) : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_0_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_descriptor_set_layout_1 init(@_descriptor_set_layout_1_initializer) : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_1_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_1 init(@_executable_layout_1_initializer) : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_1_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_0 init(@_executable_pad_test_dispatch_0_initializer) : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.func private @_executable_pad_test_dispatch_0_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_1 init(@_executable_pad_test_dispatch_1_initializer) : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.func private @_executable_pad_test_dispatch_1_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%c50_2 = vm.const.i32 50 : i32 | |
%c15_3 = vm.const.i32 15 : i32 | |
%ref_4 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_2, %c15_3, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%c50_5 = vm.const.i32 50 : i32 | |
%c15_6 = vm.const.i32 15 : i32 | |
%ref_7 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_5, %c15_6, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_7) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_4) : !vm.ref<!hal.buffer> | |
%c50_8 = vm.const.i32 50 : i32 | |
%c15_9 = vm.const.i32 15 : i32 | |
%ref_10 = vm.call @hal.allocator.allocate(%ref_0, %c50_8, %c15_9, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1_11 = vm.const.i32 1 : i32 | |
%c3_12 = vm.const.i32 3 : i32 | |
%ref_13 = vm.call @hal.command_buffer.create(%ref, %c1_11, %c3_12) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_13, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_10, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%zero_14 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_13, %_executable_pad_test_dispatch_0, %zero_14, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero_15 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_13, %c20, %c5, %zero_15) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_13, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_10, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
%zero_16 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_13, %_executable_pad_test_dispatch_1, %zero_16, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20_17 = vm.const.i32 20 : i32 | |
%c5_18 = vm.const.i32 5 : i32 | |
%zero_19 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_13, %c20_17, %c5_18, %zero_19) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_13) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_20 = vm.call.variadic @hal.buffer_view.create(%ref_10, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_21 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_20, %ref_21) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
vm.call @pad_test$async(%ref_0, %zero, %ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32, !vm.ref<!hal.semaphore>, i32) -> () | |
%0 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_true(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_false(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_all_true(%operand : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_almost_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.allocator(%buffer : !vm.ref<!hal.buffer>) -> !vm.ref<!hal.allocator> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.fill(%target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.ref<!iree.byte_buffer>, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::VM::GlobalInitializationPass *** | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.func private @_device_match_id_0_initializer() -> i32 { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.return %0 : i32 | |
} | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_0_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c6 = vm.const.i32 6 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_0_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.func private @_descriptor_set_layout_1_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c1 = vm.const.i32 1 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c1_0 = vm.const.i32 1 : i32 | |
%c1_1 = vm.const.i32 1 : i32 | |
%c7_2 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_3 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1_0), (%c1_1, %c7_2, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_3 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.func private @_executable_layout_1_initializer() -> !vm.ref<!hal.executable_layout> { | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.func private @_executable_pad_test_dispatch_0_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.func private @_executable_pad_test_dispatch_1_initializer() -> !vm.ref<!hal.executable> { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%c50_2 = vm.const.i32 50 : i32 | |
%c15_3 = vm.const.i32 15 : i32 | |
%ref_4 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_2, %c15_3, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%c50_5 = vm.const.i32 50 : i32 | |
%c15_6 = vm.const.i32 15 : i32 | |
%ref_7 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50_5, %c15_6, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_7) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_4) : !vm.ref<!hal.buffer> | |
%c50_8 = vm.const.i32 50 : i32 | |
%c15_9 = vm.const.i32 15 : i32 | |
%ref_10 = vm.call @hal.allocator.allocate(%ref_0, %c50_8, %c15_9, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1_11 = vm.const.i32 1 : i32 | |
%c3_12 = vm.const.i32 3 : i32 | |
%ref_13 = vm.call @hal.command_buffer.create(%ref, %c1_11, %c3_12) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_13, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_10, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%zero_14 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_13, %_executable_pad_test_dispatch_0, %zero_14, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero_15 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_13, %c20, %c5, %zero_15) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_13, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_10, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
%zero_16 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.dispatch(%ref_13, %_executable_pad_test_dispatch_1, %zero_16, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20_17 = vm.const.i32 20 : i32 | |
%c5_18 = vm.const.i32 5 : i32 | |
%zero_19 = vm.const.i32.zero : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_13, %c20_17, %c5_18, %zero_19) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_13) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_20 = vm.call.variadic @hal.buffer_view.create(%ref_10, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_21 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_20, %ref_21) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
vm.call @pad_test$async(%ref_0, %zero, %ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32, !vm.ref<!hal.semaphore>, i32) -> () | |
%0 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_fail %0, "semaphore wait failed" | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_true(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_false(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_all_true(%operand : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_almost_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.allocator(%buffer : !vm.ref<!hal.buffer>) -> !vm.ref<!hal.allocator> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.fill(%target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.ref<!iree.byte_buffer>, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%0 = vm.call @_device_match_id_0_initializer() : () -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref = vm.call @_descriptor_set_layout_0_initializer() : () -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_0 = vm.call @_executable_layout_0_initializer() : () -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_0, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_1 = vm.call @_descriptor_set_layout_1_initializer() : () -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @_executable_layout_1_initializer() : () -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_2, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_3 = vm.call @_executable_pad_test_dispatch_0_initializer() : () -> !vm.ref<!hal.executable> | |
vm.global.store.ref %ref_3, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_4 = vm.call @_executable_pad_test_dispatch_1_initializer() : () -> !vm.ref<!hal.executable> | |
vm.global.store.ref %ref_4, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @__init() { | |
%0 = vm.call @_device_match_id_0_initializer() : () -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref = vm.call @_descriptor_set_layout_0_initializer() : () -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_0 = vm.call @_executable_layout_0_initializer() : () -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_0, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_1 = vm.call @_descriptor_set_layout_1_initializer() : () -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @_executable_layout_1_initializer() : () -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_2, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_3 = vm.call @_executable_pad_test_dispatch_0_initializer() : () -> !vm.ref<!hal.executable> | |
vm.global.store.ref %ref_3, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_4 = vm.call @_executable_pad_test_dispatch_1_initializer() : () -> !vm.ref<!hal.executable> | |
vm.global.store.ref %ref_4, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
vm.call @pad_test$async(%ref_0, %zero, %ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32, !vm.ref<!hal.semaphore>, i32) -> () | |
%0 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c2 = vm.const.i32 2 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "unreachable location reached" | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_executable_pad_test_dispatch_1_initializer() -> !vm.ref<!hal.executable> { | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_executable_pad_test_dispatch_0_initializer() -> !vm.ref<!hal.executable> { | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_0 = vm.call.variadic @hal.executable.create(%ref, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_0 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%0: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.return %0 : !vm.ref<!hal.executable> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_executable_layout_1_initializer() -> !vm.ref<!hal.executable_layout> { | |
%zero = vm.const.i32.zero : i32 | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_descriptor_set_layout_1_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_0 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_executable_layout_0_initializer() -> !vm.ref<!hal.executable_layout> { | |
%zero = vm.const.i32.zero : i32 | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.return %ref_0 : !vm.ref<!hal.executable_layout> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_descriptor_set_layout_0_initializer() -> !vm.ref<!hal.descriptor_set_layout> { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c6 = vm.const.i32 6 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.return %ref_0 : !vm.ref<!hal.descriptor_set_layout> | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func private @_device_match_id_0_initializer() -> i32 { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.return %0 : i32 | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @__init() { | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
// *** IR Dump After Canonicalizer *** | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c2 = vm.const.i32 2 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "unreachable location reached" | |
} | |
// *** IR Dump After Inliner *** | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c2 = vm.const.i32 2 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_true(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_false(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_all_true(%operand : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_almost_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.allocator(%buffer : !vm.ref<!hal.buffer>) -> !vm.ref<!hal.allocator> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.fill(%target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.ref<!iree.byte_buffer>, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
} | |
// *** IR Dump After CSE *** | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c2 = vm.const.i32 2 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_true(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_false(%operand : i32) attributes {sym_visibility = "private"} | |
vm.import @check.expect_all_true(%operand : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @check.expect_almost_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.allocator(%buffer : !vm.ref<!hal.buffer>) -> !vm.ref<!hal.allocator> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.fill(%target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.ref<!iree.byte_buffer>, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32, %pattern : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i32, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
} | |
// *** IR Dump After SymbolDCE *** | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%c-1 = vm.const.i32 -1 : i32 | |
%c4 = vm.const.i32 4 : i32 | |
%c24 = vm.const.i32 24 : i32 | |
%c108 = vm.const.i32 108 : i32 | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c2 = vm.const.i32 2 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%zero = vm.const.i32.zero : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::VM::SinkDefiningOpsPass *** | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%c-1 = vm.const.i32 -1 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%0 = iree.do_not_optimize(%ref_3) : !vm.ref<!hal.buffer> | |
%1 = iree.do_not_optimize(%ref_2) : !vm.ref<!hal.buffer> | |
%c108 = vm.const.i32 108 : i32 | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c4 = vm.const.i32 4 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %1, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c24 = vm.const.i32 24 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %0, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
%c1 = vm.const.i32 1 : i32 | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
// *** IR Dump After mlir::iree_compiler::IREE::DropCompilerHintsPass *** | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%c-1 = vm.const.i32 -1 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%c108 = vm.const.i32 108 : i32 | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c4 = vm.const.i32 4 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %ref_2, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c24 = vm.const.i32 24 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %ref_3, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
%c1 = vm.const.i32 1 : i32 | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
} | |
module { | |
vm.module @module { | |
vm.global.i32 @_device_match_id_0 mutable : i32 | |
vm.rodata @_utf8_vulkan_7197BF52A22CAFD7 dense<[118, 117, 108, 107, 97, 110, 42]> : vector<7xi8> | |
vm.global.ref @_descriptor_set_layout_0 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_0 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_descriptor_set_layout_1 mutable : !vm.ref<!hal.descriptor_set_layout> | |
vm.global.ref @_executable_layout_1 mutable : !vm.ref<!hal.executable_layout> | |
vm.global.ref @_executable_pad_test_dispatch_0 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv dense<"0x080000005350564588FAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F300052010000030223070000010016000000300000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060027000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3000050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138333936383030305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F300047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000010000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000200000002B0004000300000015000000000000002B00040003000000160000000900000020000400170000000C0000000300000014000200200000003600050011000000120000000000000010000000F8000200130000004100060017000000180000000F00000015000000150000003D0004000300000019000000180000003D000400020000001A0000000500000051000500030000001B0000001A000000000000003D000400020000001C0000000400000051000500030000001D0000001C0000000000000084000500030000001E0000001B0000001400000080000500030000001F0000001E0000001D000000B100050020000000210000001F00000009000000F70003002400000000000000FA000400210000002300000024000000F8000200230000008700050003000000250000001F000000160000000C000600030000002600000027000000050000001F0000000C00060003000000280000002700000005000000160000008900050003000000290000002600000028000000AA000500200000002A0000001F000000260000007E000400030000002B00000029000000A9000600030000002C0000002A000000290000002B00000084000500030000002D000000250000001600000080000500030000002E0000002D0000002C00000041000600170000002F0000000A000000150000002E0000003E0003002F00000019000000F900020024000000F800020024000000FD0001003800010008000C0004000800"> : vector<1416xi8> | |
vm.global.ref @_executable_pad_test_dispatch_1 mutable : !vm.ref<!hal.executable> | |
vm.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv dense<"0x08000000535056452CFAFFFF08000000240000000100000004000000130000007061645F746573745F64697370617463685F310069010000030223070000010016000000350000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000B00060026000000474C534C2E7374642E343530000000000E00030000000000010000000F000A0005000000120000007061645F746573745F64697370617463685F3100050000000400000010000600120000001100000020000000010000000100000005000B00040000005F5F6275696C74696E5F7661725F4C6F63616C496E766F636174696F6E49645F5F00000005000900050000005F5F6275696C74696E5F7661725F576F726B67726F757049645F5F00050009000A0000005F5F7265736F757263655F7661725F3138343135323936305F5F0000050009000F0000005F5F7265736F757263655F7661725F3138333735323534345F5F000005000700120000007061645F746573745F64697370617463685F310047000400040000000B0000001B00000047000400050000000B0000001A000000470004000800000006000000040000004800050007000000000000002300000000000000470003000700000002000000470004000A0000002100000001000000470004000A0000002200000000000000470004000D0000000600000004000000480005000C000000000000002300000000000000470003000C00000002000000470004000F0000002100000000000000470004000F00000022000000000000001500040003000000200000000000000017000400020000000300000003000000200004000100000001000000020000003B0004000100000004000000010000003B0004000100000005000000010000002B00040003000000090000001B0000001C0004000800000003000000090000001E000300070000000800000020000400060000000C000000070000003B000400060000000A0000000C0000002B000400030000000E000000060000001C0004000D000000030000000E0000001E0003000C0000000D000000200004000B0000000C0000000C0000003B0004000B0000000F0000000C00000013000200110000002100030010000000110000002B0004000300000014000000010000002B0004000300000015000000200000002B0004000300000016000000030000002B0004000300000017000000000000002B000400030000001800000009000000140002001F000000200004002E0000000C000000030000003600050011000000120000000000000010000000F8000200130000003D00040002000000190000000500000051000500030000001A00000019000000000000003D000400020000001B0000000400000051000500030000001C0000001B0000000000000084000500030000001D0000001A0000001500000080000500030000001E0000001D0000001C000000B10005001F000000200000001E0000000E000000F70003002300000000000000FA000400200000002200000023000000F8000200220000008700050003000000240000001E000000160000000C000600030000002500000026000000050000001E0000000C00060003000000270000002600000005000000160000008900050003000000280000002500000027000000AA0005001F000000290000001E000000250000007E000400030000002A00000028000000A9000600030000002B00000029000000280000002A00000084000500030000002C000000240000001600000080000500030000002D0000002C0000002B000000410006002E0000002F0000000F000000170000002D0000003D00040003000000300000002F0000008000050003000000310000002B0000001400000084000500030000003200000024000000180000008000050003000000330000003200000031000000410006002E000000340000000A00000017000000330000003E0003003400000030000000F900020023000000F800020023000000FD0001003800010008000C0004000800"> : vector<1508xi8> | |
vm.rodata @pad_test_const dense<[[0, 1, 2, 3, 0, 0, 0, 0, 0], [0, 4, 5, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]> : tensor<3x9xi32> | |
vm.rodata @pad_test_const_0 dense<0> : tensor<i32> | |
vm.rodata @pad_test_const_1 dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> | |
vm.func @pad_test() attributes {noinline} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_0 = vm.call @hal.device.allocator(%ref) : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%c-1 = vm.const.i32 -1 : i32 | |
%c50 = vm.const.i32 50 : i32 | |
%c15 = vm.const.i32 15 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%pad_test_const = vm.const.ref.rodata @pad_test_const : !vm.ref<!iree.byte_buffer> | |
%ref_1 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_0 = vm.const.ref.rodata @pad_test_const_0 : !vm.ref<!iree.byte_buffer> | |
%ref_2 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_0, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%pad_test_const_1 = vm.const.ref.rodata @pad_test_const_1 : !vm.ref<!iree.byte_buffer> | |
%ref_3 = vm.call @hal.allocator.wrap.byte_buffer(%ref_0, %c50, %c15, %pad_test_const_1, %zero, %c-1) : (!vm.ref<!hal.allocator>, i32, i32, !vm.ref<!iree.byte_buffer>, i32, i32) -> !vm.ref<!hal.buffer> | |
%c108 = vm.const.i32 108 : i32 | |
%ref_4 = vm.call @hal.allocator.allocate(%ref_0, %c50, %c15, %c108) : (!vm.ref<!hal.allocator>, i32, i32, i32) -> !vm.ref<!hal.buffer> | |
%c1 = vm.const.i32 1 : i32 | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call @hal.command_buffer.create(%ref, %c1, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%c4 = vm.const.i32 4 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_0, %zero, [(%zero, %ref_2, %zero, %c4), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_pad_test_dispatch_0 = vm.global.load.ref @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_0, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c20 = vm.const.i32 20 : i32 | |
%c5 = vm.const.i32 5 : i32 | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%c24 = vm.const.i32 24 : i32 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_5, %_executable_layout_1, %zero, [(%zero, %ref_3, %zero, %c24), (%c1, %ref_4, %zero, %c108)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) | |
%_executable_pad_test_dispatch_1 = vm.global.load.ref @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.call @hal.command_buffer.dispatch(%ref_5, %_executable_pad_test_dispatch_1, %zero, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_5, %c20, %c5, %zero) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_5) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref, %ref_5) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c9 = vm.const.i32 9 : i32 | |
%c16777248 = vm.const.i32 16777248 : i32 | |
%ref_6 = vm.call.variadic @hal.buffer_view.create(%ref_4, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
%ref_7 = vm.call.variadic @hal.buffer_view.create(%ref_1, %c16777248, [%c3, %c9]) : (!vm.ref<!hal.buffer>, i32, i32 ...) -> !vm.ref<!hal.buffer_view> | |
vm.call @check.expect_eq(%ref_6, %ref_7) : (!vm.ref<!hal.buffer_view>, !vm.ref<!hal.buffer_view>) -> () | |
vm.return | |
^bb2: // pred: ^bb0 | |
%c2 = vm.const.i32 2 : i32 | |
vm.fail %c2, "unreachable location reached" | |
} | |
vm.export @pad_test as("pad_test$raw") | |
vm.func @pad_test$async(%arg0: !vm.ref<!hal.semaphore>, %arg1: i32, %arg2: !vm.ref<!hal.semaphore>, %arg3: i32) { | |
%0 = vm.call @hal.semaphore.await(%arg0, %arg1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
vm.call @hal.semaphore.signal(%arg2, %arg3) : (!vm.ref<!hal.semaphore>, i32) -> () | |
vm.return | |
^bb2(%1: i32): // pred: ^bb0 | |
vm.fail %1, "semaphore wait failed" | |
} | |
vm.export @pad_test$async | |
vm.func @pad_test$sync() attributes {iree.reflection = {f = "I1!R1!", fv = "1"}} { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%zero = vm.const.i32.zero : i32 | |
%ref_0 = vm.call @hal.semaphore.create(%ref, %zero) : (!vm.ref<!hal.device>, i32) -> !vm.ref<!hal.semaphore> | |
%0 = vm.call @hal.semaphore.await(%ref_0, %zero) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %0, ^bb2(%0 : i32), ^bb1 | |
^bb1: // pred: ^bb0 | |
vm.call @pad_test() : () -> () | |
%c1 = vm.const.i32 1 : i32 | |
vm.call @hal.semaphore.signal(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> () | |
%1 = vm.call @hal.semaphore.await(%ref_0, %c1) : (!vm.ref<!hal.semaphore>, i32) -> i32 | |
vm.cond_br %1, ^bb2(%1 : i32), ^bb3 | |
^bb2(%2: i32): // 2 preds: ^bb0, ^bb1 | |
vm.fail %2, "semaphore wait failed" | |
^bb3: // pred: ^bb1 | |
vm.return | |
} | |
vm.export @pad_test$sync as("pad_test") | |
vm.import @check.expect_eq(%lhs : !vm.ref<!hal.buffer_view>, %rhs : !vm.ref<!hal.buffer_view>) attributes {sym_visibility = "private"} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.ref<!iree.byte_buffer>, %offset : i32, %length : i32) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %shape : i32 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i32, i32> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.match.id(%device : !vm.ref<!hal.device>, %pattern : !vm.ref<!iree.byte_buffer>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : i32, %executable_data : !vm.ref<!iree.byte_buffer>, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i32) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.func @__init() { | |
%ref = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_utf8_vulkan_7197BF52A22CAFD7 = vm.const.ref.rodata @_utf8_vulkan_7197BF52A22CAFD7 : !vm.ref<!iree.byte_buffer> | |
%0 = vm.call @hal.device.match.id(%ref, %_utf8_vulkan_7197BF52A22CAFD7) : (!vm.ref<!hal.device>, !vm.ref<!iree.byte_buffer>) -> i32 | |
vm.global.store.i32 %0, @_device_match_id_0 : i32 | |
%ref_0 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c6 = vm.const.i32 6 : i32 | |
%c1 = vm.const.i32 1 : i32 | |
%c7 = vm.const.i32 7 : i32 | |
%zero = vm.const.i32.zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_0, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c6)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_1, @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_0 = vm.global.load.ref @_descriptor_set_layout_0 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call.variadic @hal.executable_layout.create(%ref_2, %zero, [%_descriptor_set_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_3, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%ref_4 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%c3 = vm.const.i32 3 : i32 | |
%ref_5 = vm.call.variadic @hal.descriptor_set_layout.create(%ref_4, %c1, [(%zero, %c7, %c1), (%c1, %c7, %c3)]) : (!vm.ref<!hal.device>, i32, tuple<i32, i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
vm.global.store.ref %ref_5, @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%_descriptor_set_layout_1 = vm.global.load.ref @_descriptor_set_layout_1 : !vm.ref<!hal.descriptor_set_layout> | |
%ref_6 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%ref_7 = vm.call.variadic @hal.executable_layout.create(%ref_6, %zero, [%_descriptor_set_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.ref %ref_7, @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%ref_8 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0 = vm.global.load.i32 @_device_match_id_0 : i32 | |
%c1397773893 = vm.const.i32 1397773893 : i32 | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.cond_br %_device_match_id_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_0_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_0_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref_8, %c1397773893, %_pad_test_dispatch_0_vulkan_spirv_binary_spirv, [%_executable_layout_0]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%1: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %1, @_executable_pad_test_dispatch_0 : !vm.ref<!hal.executable> | |
%ref_10 = vm.call @hal.ex.shared_device() : () -> !vm.ref<!hal.device> | |
%_device_match_id_0_11 = vm.global.load.i32 @_device_match_id_0 : i32 | |
vm.cond_br %_device_match_id_0_11, ^bb4, ^bb5 | |
^bb4: // pred: ^bb3 | |
%_executable_layout_1 = vm.global.load.ref @_executable_layout_1 : !vm.ref<!hal.executable_layout> | |
%_pad_test_dispatch_1_vulkan_spirv_binary_spirv = vm.const.ref.rodata @_pad_test_dispatch_1_vulkan_spirv_binary_spirv : !vm.ref<!iree.byte_buffer> | |
%ref_12 = vm.call.variadic @hal.executable.create(%ref_10, %c1397773893, %_pad_test_dispatch_1_vulkan_spirv_binary_spirv, [%_executable_layout_1]) : (!vm.ref<!hal.device>, i32, !vm.ref<!iree.byte_buffer>, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb6(%ref_12 : !vm.ref<!hal.executable>) | |
^bb5: // pred: ^bb3 | |
vm.br ^bb6(%null : !vm.ref<!hal.executable>) | |
^bb6(%2: !vm.ref<!hal.executable>): // 2 preds: ^bb4, ^bb5 | |
vm.global.store.ref %2, @_executable_pad_test_dispatch_1 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__init | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment