Created
June 15, 2022 19:41
-
-
Save benvanik/c9b4eabde8801ee66e39813e3392187a to your computer and use it in GitHub Desktop.
simple_mul.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "(gdb) iree-compile", | |
"type": "cppdbg", | |
"request": "launch", | |
"preLaunchTask": "build-iree-compile", | |
"program": "${command:cmake.buildDirectory}/tools/iree-compile", | |
"args": [ | |
// "-iree-vm-bytecode-module-output-format=annotated-mlir-text", | |
"-iree-vm-bytecode-source-listing=${workspaceFolder}/../iree-tmp/vm.mlir", | |
"-iree-vm-emit-polyglot-zip=true", | |
// "-mlir-elide-elementsattrs-if-larger=8192", | |
"-mlir-disable-threading", | |
// "-mlir-print-ir-before-all", | |
// "-mlir-print-ir-after-all", | |
// "-iree-hal-dump-executable-sources-to=${workspaceFolder}/../iree-tmp/executables/", | |
"-iree-hal-target-backends=dylib-llvm-aot", | |
"-iree-llvm-target-triple=x86_64-pc-linux-elf", | |
"-iree-llvm-link-embedded", | |
// "-iree-llvm-keep-linker-artifacts", | |
// "-iree-input-type=tosa", | |
"-iree-input-type=mhlo", | |
"${workspaceFolder}/runtime/src/iree/runtime/testdata/simple_mul.mlir", | |
"-o=${workspaceFolder}/../iree-tmp/simple_mul.vmfb", | |
// "-iree-input-type=mhlo", | |
// "${workspaceFolder}/iree/test/e2e/models/mobilenetv3_fake_weights.mlir", | |
// "-o=${workspaceFolder}/../iree-tmp/mobilenetv3_fake_weights.vmfb", | |
// "${workspaceFolder}/iree/test/e2e/models/unidirectional_lstm.mlir", | |
// "-o=${workspaceFolder}/../iree-tmp/unidirectional_lstm.vmfb", | |
">", | |
"${workspaceFolder}/../iree-tmp/iree-compile-out.txt", | |
"2>&1" | |
], | |
"stopAtEntry": false, | |
"cwd": "${workspaceFolder}", | |
// "internalConsoleOptions": "openOnSessionStart", | |
"externalConsole": false, | |
"MIMode": "gdb", | |
"setupCommands": [ | |
{ | |
"description": "Enable pretty-printing for gdb", | |
"text": "-enable-pretty-printing", | |
"ignoreFailures": true | |
} | |
], | |
// "visualizerFile": "${workspaceFolder}/iree.natvis" | |
}, | |
{ | |
"name": "(lldb) iree-run-module", | |
"type": "cppdbg", | |
// "type": "lldb", | |
"request": "launch", | |
"preLaunchTask": "build-iree-run-module", | |
"program": "${command:cmake.buildDirectory}/tools/iree-run-module", | |
"args": [ | |
"--trace_execution", | |
"--device=local-sync", | |
"--module_file=${workspaceFolder}/../iree-tmp/simple_mul.vmfb", | |
"--entry_function=simple_mul", | |
"--function_input=4xf32=12", | |
"--function_input=4xf32=2", | |
">", | |
"${workspaceFolder}/../iree-tmp/iree-run-module-out.txt", | |
"2>&1" | |
], | |
"stopAtEntry": false, | |
"cwd": "${workspaceFolder}", | |
"environment": [ | |
{ | |
"name": "TRACY_NO_EXIT", | |
"value": "1", | |
} | |
], | |
// "internalConsoleOptions": "openOnSessionStart", | |
"externalConsole": false, | |
"MIMode": "gdb", | |
// "MIMode": "lldb", | |
// "miDebuggerPath": "/usr/bin/lldb", | |
"setupCommands": [ | |
{ | |
"description": "Enable pretty-printing for gdb", | |
"text": "-enable-pretty-printing", | |
"ignoreFailures": true | |
} | |
], | |
"visualizerFile": "${workspaceFolder}/iree.natvis" | |
}, |
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After TopLevelSCFToCFG //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After MHLOToMHLOPreprocessing //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After ShapeToShapeLowering //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After ConvertShapeToStandard //----- // | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After Inliner //----- // | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::DemoteI64ToI32Pass //----- // | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::DemoteF64ToF32Pass //----- // | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After ConvertMHLOToLinalgExt //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = mhlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32> | |
return %0 : tensor<4xf32> | |
} | |
// -----// IR Dump After ConvertMHLOToLinalgOnTensors //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
// -----// IR Dump After ReconcileUnrealizedCasts //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
// -----// IR Dump After VerifyCompilerMHLOInputLegality //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After IREEImportPublic //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After SanitizeModuleNames //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = call @_simple_mul(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
func.func private @_simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func private @_simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> { | |
%0 = linalg.init_tensor [4] : tensor<4xf32> | |
%1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%2 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %2 : f32 | |
} -> tensor<4xf32> | |
return %1 : tensor<4xf32> | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = call @_simple_mul(%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After Inliner //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After SymbolDCE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::DemoteF64ToF32Pass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After ConvertConv2D1x1ConvToMatmul //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After VerifyInputLegality //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After LinalgNamedOpConversion //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After ExpandTensorShapes //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module attributes {iree.fixedpoint.iteration = 0 : index} { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module attributes {iree.fixedpoint.iteration = 0 : index} { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FixedPointIteratorPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After PadTensorToSubTensorInsert //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After ConvertElementwiseToLinalg //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After LinalgFoldUnitExtentDims //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After InterchangeGenericOps //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After ResolveShapedTypeResultDims //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After FusionOfTensorOps //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After SplitReduction //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After InterchangeGenericOps //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg2: f32, %arg3: f32, %arg4: f32): | |
%5 = arith.mulf %arg2, %arg3 : f32 | |
linalg.yield %5 : f32 | |
} -> tensor<4xf32> | |
%4 = hal.tensor.export %3 : tensor<4xf32> -> !hal.buffer_view | |
return %4 : !hal.buffer_view | |
} | |
// -----// IR Dump After DispatchLinalgOnTensors //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch.workgroups[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> = | |
(%arg2: !flow.dispatch.tensor<readonly:4xf32>, %arg3: !flow.dispatch.tensor<readonly:4xf32>, %arg4: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%4 = flow.dispatch.tensor.load %arg2, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = flow.dispatch.tensor.load %arg3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = linalg.init_tensor [4] : tensor<4xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4, %5 : tensor<4xf32>, tensor<4xf32>) outs(%6 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg5: f32, %arg6: f32, %arg7: f32): | |
%8 = arith.mulf %arg5, %arg6 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %7, %arg4, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
flow.return | |
} | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After CaptureDispatchDynamicDims //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch.workgroups[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> = | |
(%arg2: !flow.dispatch.tensor<readonly:4xf32>, %arg3: !flow.dispatch.tensor<readonly:4xf32>, %arg4: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%4 = flow.dispatch.tensor.load %arg2, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = flow.dispatch.tensor.load %arg3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = linalg.init_tensor [4] : tensor<4xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4, %5 : tensor<4xf32>, tensor<4xf32>) outs(%6 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg5: f32, %arg6: f32, %arg7: f32): | |
%8 = arith.mulf %arg5, %arg6 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %7, %arg4, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
flow.return | |
} | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch.workgroups[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> = | |
(%arg2: !flow.dispatch.tensor<readonly:4xf32>, %arg3: !flow.dispatch.tensor<readonly:4xf32>, %arg4: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%4 = flow.dispatch.tensor.load %arg2, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = flow.dispatch.tensor.load %arg3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = linalg.init_tensor [4] : tensor<4xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4, %5 : tensor<4xf32>, tensor<4xf32>) outs(%6 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg5: f32, %arg6: f32, %arg7: f32): | |
%8 = arith.mulf %arg5, %arg6 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %7, %arg4, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
flow.return | |
} | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch.workgroups[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> = | |
(%arg2: !flow.dispatch.tensor<readonly:4xf32>, %arg3: !flow.dispatch.tensor<readonly:4xf32>, %arg4: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%4 = flow.dispatch.tensor.load %arg2, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = flow.dispatch.tensor.load %arg3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = linalg.init_tensor [4] : tensor<4xf32> | |
%7 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4, %5 : tensor<4xf32>, tensor<4xf32>) outs(%6 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg5: f32, %arg6: f32, %arg7: f32): | |
%8 = arith.mulf %arg5, %arg6 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %7, %arg4, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
flow.return | |
} | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After InitializeEmptyTensors //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch.workgroups[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> = | |
(%arg2: !flow.dispatch.tensor<readonly:4xf32>, %arg3: !flow.dispatch.tensor<readonly:4xf32>, %arg4: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%4 = flow.dispatch.tensor.load %arg2, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = flow.dispatch.tensor.load %arg3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = linalg.init_tensor [4] : tensor<4xf32> | |
%7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%4, %5 : tensor<4xf32>, tensor<4xf32>) outs(%6 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg5: f32, %arg6: f32, %arg7: f32): | |
%8 = arith.mulf %arg5, %arg6 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %7, %arg4, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
flow.return | |
} | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After OutlineDispatchRegions //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::StripDebugOpsPass //----- // | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After DeduplicateExecutables //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After CleanupTensorShapes //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After SymbolDCE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After VerifyInput //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After OutlineConstants //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
flow.executable private @simple_mul_dispatch_0 { | |
flow.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !flow.dispatch.tensor<readonly:4xf32>, %arg1: !flow.dispatch.tensor<readonly:4xf32>, %arg2: !flow.dispatch.tensor<writeonly:4xf32>) { | |
%0 = flow.dispatch.tensor.load %arg0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%2 = linalg.init_tensor [4] : tensor<4xf32> | |
%3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%0, %1 : tensor<4xf32>, tensor<4xf32>) outs(%2 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%4 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %4 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %3, %arg2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%0 = hal.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> | |
%1 = hal.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> | |
%2 = flow.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | |
%3 = hal.tensor.export %2 : tensor<4xf32> -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After ConvertToStream //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%c4_0 = arith.constant 4 : index | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4_0]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%c553648160_i32_1 = arith.constant 553648160 : i32 | |
%c1_i32_2 = arith.constant 1 : i32 | |
%c4_3 = arith.constant 4 : index | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4_3]) type(%c553648160_i32_1) encoding(%c1_i32_2) | |
%3 = stream.tensor.sizeof tensor<4xf32> : index | |
%4 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%3} | |
%5 = stream.async.transfer %4 : !stream.resource<external>{%3} -> !stream.resource<*>{%3} | |
%6 = stream.tensor.sizeof tensor<4xf32> : index | |
%7 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %5) : (!stream.resource<*>{%0}, !stream.resource<*>{%3}) -> !stream.resource<*>{%6} | |
%8 = stream.async.transfer %7 : !stream.resource<*>{%6} -> !stream.resource<external>{%6} | |
%9 = stream.tensor.export %8 : tensor<4xf32> in !stream.resource<external>{%6} -> !hal.buffer_view | |
return %9 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After VerifyLoweringToTensors //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%c4_0 = arith.constant 4 : index | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4_0]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%c553648160_i32_1 = arith.constant 553648160 : i32 | |
%c1_i32_2 = arith.constant 1 : i32 | |
%c4_3 = arith.constant 4 : index | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4_3]) type(%c553648160_i32_1) encoding(%c1_i32_2) | |
%3 = stream.tensor.sizeof tensor<4xf32> : index | |
%4 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%3} | |
%5 = stream.async.transfer %4 : !stream.resource<external>{%3} -> !stream.resource<*>{%3} | |
%6 = stream.tensor.sizeof tensor<4xf32> : index | |
%7 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %5) : (!stream.resource<*>{%0}, !stream.resource<*>{%3}) -> !stream.resource<*>{%6} | |
%8 = stream.async.transfer %7 : !stream.resource<*>{%6} -> !stream.resource<external>{%6} | |
%9 = stream.tensor.export %8 : tensor<4xf32> in !stream.resource<external>{%6} -> !hal.buffer_view | |
return %9 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.sizeof tensor<4xf32> : index | |
%4 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%3} | |
%5 = stream.async.transfer %4 : !stream.resource<external>{%3} -> !stream.resource<*>{%3} | |
%6 = stream.tensor.sizeof tensor<4xf32> : index | |
%7 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %5) : (!stream.resource<*>{%0}, !stream.resource<*>{%3}) -> !stream.resource<*>{%6} | |
%8 = stream.async.transfer %7 : !stream.resource<*>{%6} -> !stream.resource<external>{%6} | |
%9 = stream.tensor.export %8 : tensor<4xf32> in !stream.resource<external>{%6} -> !hal.buffer_view | |
return %9 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::CombineInitializersPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.sizeof tensor<4xf32> : index | |
%1 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%2 = stream.async.transfer %1 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%3 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%0} | |
%4 = stream.async.transfer %3 : !stream.resource<external>{%0} -> !stream.resource<*>{%0} | |
%5 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%2, %4) : (!stream.resource<*>{%0}, !stream.resource<*>{%0}) -> !stream.resource<*>{%0} | |
%6 = stream.async.transfer %5 : !stream.resource<*>{%0} -> !stream.resource<external>{%0} | |
%7 = stream.tensor.export %6 : tensor<4xf32> in !stream.resource<external>{%0} -> !hal.buffer_view | |
return %7 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After EncodeDeviceTensors //----- // | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After EncodeHostTensors //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
// -----// IR Dump After MaterializeBuiltins //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After MaterializeCopyOnWrite //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
// -----// IR Dump After ElideAsyncCopies //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%1 = stream.async.transfer %0 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%2 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%3 = stream.async.transfer %2 : !stream.resource<external>{%c16} -> !stream.resource<*>{%c16} | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%1, %3) : (!stream.resource<*>{%c16}, !stream.resource<*>{%c16}) -> !stream.resource<*>{%c16} | |
%5 = stream.async.transfer %4 : !stream.resource<*>{%c16} -> !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After RefineUsage //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%0, %1) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After ScheduleExecution //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After ScheduleConcurrency //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After PropagateTimepoints //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.timepoint.immediate => !stream.timepoint | |
%3 = stream.timepoint.immediate => !stream.timepoint | |
%4 = stream.timepoint.immediate => !stream.timepoint | |
%results, %result_timepoint = stream.async.execute await(%4) => with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%7 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %7 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%5 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%6 = stream.tensor.export %5 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %6 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After VerifyLoweringToAsync //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%results, %result_timepoint = stream.async.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} { | |
%4 = stream.async.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%arg2, %arg3) : (!stream.resource<external>{%c16}, !stream.resource<external>{%c16}) -> !stream.resource<external>{%c16} | |
stream.yield %4 : !stream.resource<external>{%c16} | |
} => !stream.timepoint | |
%2 = stream.timepoint.await %result_timepoint => %results : !stream.resource<external>{%c16} | |
%3 = stream.tensor.export %2 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %3 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After ScheduleAllocation //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After PackConstants //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After PackAllocations //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After LayoutSlices //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After PropagateSubviews //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After VerifyLoweringToCmd //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After FuseDispatchBindings //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: index, %arg4: index, %arg5: index) { | |
%c0 = arith.constant 0 : index | |
%0 = arith.addi %c0, %arg3 : index | |
%1 = stream.binding.subspan %arg0[%0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = arith.addi %c0, %arg4 : index | |
%3 = stream.binding.subspan %arg1[%2] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%4 = arith.addi %c0, %arg5 : index | |
%5 = stream.binding.subspan %arg2[%4] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%6 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %3, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.init_tensor [4] : tensor<4xf32> | |
%9 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%8 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg6: f32, %arg7: f32, %arg8: f32): | |
%10 = arith.mulf %arg6, %arg7 : f32 | |
linalg.yield %10 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %9, %5, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%c0_0 = arith.constant 0 : index | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%c0, %c0, %c0 : index, index, index) { | |
ro %arg2[%c0_0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0_0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0_0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After PackDispatchOperands //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: i32, %arg4: i32, %arg5: i32) { | |
%0 = arith.index_cast %arg3 : i32 to index | |
%1 = arith.index_cast %arg4 : i32 to index | |
%2 = arith.index_cast %arg5 : i32 to index | |
%c0 = arith.constant 0 : index | |
%3 = arith.addi %c0, %0 : index | |
%4 = stream.binding.subspan %arg0[%3] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%5 = arith.addi %c0, %1 : index | |
%6 = stream.binding.subspan %arg1[%5] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%7 = arith.addi %c0, %2 : index | |
%8 = stream.binding.subspan %arg2[%7] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%9 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%11 = linalg.init_tensor [4] : tensor<4xf32> | |
%12 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%9, %10 : tensor<4xf32>, tensor<4xf32>) outs(%11 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg6: f32, %arg7: f32, %arg8: f32): | |
%13 = arith.mulf %arg6, %arg7 : f32 | |
linalg.yield %13 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %12, %8, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%c0_0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%c0_i32_1 = arith.constant 0 : i32 | |
%c0_i32_2 = arith.constant 0 : i32 | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%c0_i32, %c0_i32_1, %c0_i32_2 : i32, i32, i32) { | |
ro %arg2[%c0_0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0_0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0_0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding, %arg3: i32, %arg4: i32, %arg5: i32) { | |
%0 = arith.index_cast %arg3 : i32 to index | |
%1 = arith.index_cast %arg4 : i32 to index | |
%2 = arith.index_cast %arg5 : i32 to index | |
%c0 = arith.constant 0 : index | |
%3 = arith.addi %c0, %0 : index | |
%4 = stream.binding.subspan %arg0[%3] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%5 = arith.addi %c0, %1 : index | |
%6 = stream.binding.subspan %arg1[%5] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%7 = arith.addi %c0, %2 : index | |
%8 = stream.binding.subspan %arg2[%7] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%9 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%11 = linalg.init_tensor [4] : tensor<4xf32> | |
%12 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%9, %10 : tensor<4xf32>, tensor<4xf32>) outs(%11 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg6: f32, %arg7: f32, %arg8: f32): | |
%13 = arith.mulf %arg6, %arg7 : f32 | |
linalg.yield %13 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %12, %8, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1](%c0_i32, %c0_i32, %c0_i32 : i32, i32, i32) { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After FoldUniformOperands //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) { | |
%c0_i32 = arith.constant 0 : i32 | |
%0 = arith.index_cast %c0_i32 : i32 to index | |
%1 = arith.index_cast %c0_i32 : i32 to index | |
%2 = arith.index_cast %c0_i32 : i32 to index | |
%c0 = arith.constant 0 : index | |
%3 = arith.addi %c0, %0 : index | |
%4 = stream.binding.subspan %arg0[%3] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%5 = arith.addi %c0, %1 : index | |
%6 = stream.binding.subspan %arg1[%5] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%7 = arith.addi %c0, %2 : index | |
%8 = stream.binding.subspan %arg2[%7] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%9 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%11 = linalg.init_tensor [4] : tensor<4xf32> | |
%12 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%9, %10 : tensor<4xf32>, tensor<4xf32>) outs(%11 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%13 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %13 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %12, %8, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After AnnotateDispatchArguments //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0_i32 = arith.constant 0 : i32 | |
%0 = arith.index_cast %c0_i32 : i32 to index | |
%1 = arith.index_cast %c0_i32 : i32 to index | |
%2 = arith.index_cast %c0_i32 : i32 to index | |
%c0 = arith.constant 0 : index | |
%3 = arith.addi %c0, %0 : index | |
%4 = stream.binding.subspan %arg0[%3] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%5 = arith.addi %c0, %1 : index | |
%6 = stream.binding.subspan %arg1[%5] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%7 = arith.addi %c0, %2 : index | |
%8 = stream.binding.subspan %arg2[%7] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%9 = flow.dispatch.tensor.load %4, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%10 = flow.dispatch.tensor.load %6, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%11 = linalg.init_tensor [4] : tensor<4xf32> | |
%12 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%9, %10 : tensor<4xf32>, tensor<4xf32>) outs(%11 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%13 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %13 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %12, %8, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%c0 = arith.constant 0 : index | |
%c0_i32 = arith.constant 0 : i32 | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After SymbolDCE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#map = affine_map<(d0) -> (d0)> | |
module { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::AssignTargetDevicesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#map = affine_map<(d0) -> (d0)> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::VerifyTargetEnvironmentPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#map = affine_map<(d0) -> (d0)> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
stream.executable private @simple_mul_dispatch_0 { | |
stream.executable.export public @simple_mul_dispatch_0 | |
builtin.module { | |
func.func @simple_mul_dispatch_0(%arg0: !stream.binding {stream.alignment = 64 : index}, %arg1: !stream.binding {stream.alignment = 64 : index}, %arg2: !stream.binding {stream.alignment = 64 : index}) { | |
%c0 = arith.constant 0 : index | |
%0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> !flow.dispatch.tensor<readonly:4xf32> | |
%2 = stream.binding.subspan %arg2[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%7 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::(anonymous namespace)::MaterializeInterfacesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<(d0) -> (d0)> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) | |
builtin.module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): | |
%7 = arith.mulf %arg0, %arg1 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%0 = stream.tensor.import %arg0 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%1 = stream.tensor.import %arg1 : !hal.buffer_view -> tensor<4xf32> in !stream.resource<external>{%c16} | |
%2 = stream.resource.alloc uninitialized : !stream.resource<external>{%c16} | |
%3 = stream.cmd.execute with(%0 as %arg2: !stream.resource<external>{%c16}, %1 as %arg3: !stream.resource<external>{%c16}, %2 as %arg4: !stream.resource<external>{%c16}) { | |
stream.cmd.dispatch @simple_mul_dispatch_0::@simple_mul_dispatch_0[%c4, %c1, %c1] { | |
ro %arg2[%c0 for %c16] : !stream.resource<external>{%c16}, | |
ro %arg3[%c0 for %c16] : !stream.resource<external>{%c16}, | |
wo %arg4[%c0 for %c16] : !stream.resource<external>{%c16} | |
} attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} | |
} => !stream.timepoint | |
%4 = stream.timepoint.await %3 => %2 : !stream.resource<external>{%c16} | |
%5 = stream.tensor.export %4 : tensor<4xf32> in !stream.resource<external>{%c16} -> !hal.buffer_view | |
return %5 : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After VerifyLinalgTransformLegality //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): | |
%7 = arith.mulf %arg0, %arg1 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
// -----// IR Dump After TypePropagation //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): | |
%7 = arith.mulf %arg0, %arg1 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
// -----// IR Dump After BufferizeCopyOnlyDispatches //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {name = "mul.1"} { | |
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): | |
%7 = arith.mulf %arg0, %arg1 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
// -----// IR Dump After InsertDistributionInfo //----- // | |
hal.executable.variant public @embedded_elf_x86_64, target = <"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]>} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%5 = linalg.init_tensor [4] : tensor<4xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%3, %4 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg0: f32, %arg1: f32, %arg2: f32): | |
%7 = arith.mulf %arg0, %arg1 : f32 | |
linalg.yield %7 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %6, %2, offsets = [0], sizes = [4], strides = [1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After TileAndDistributeToWorkgroups //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = linalg.init_tensor [4] : tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%5, %6 : tensor<4xf32>, tensor<4xf32>) outs(%7 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After ConvertToDestinationPassingStyle //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%c1_0 = arith.constant 1 : index | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [%c1_0] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.init_tensor [4] : tensor<4xf32> | |
%9 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%10 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %10 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %9, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After FoldAffineMinInDistributedLoops //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [%c1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
} | |
// -----// IR Dump After LinalgStrategyTileAndFusePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = scf.for %arg1 = %c0 to %c4 step %c4 iter_args(%arg2 = %5) -> (tensor<4xf32>) { | |
%9 = tensor.extract_slice %6[%arg1] [4] [1] : tensor<4xf32> to tensor<4xf32> | |
%10 = tensor.extract_slice %7[%arg1] [4] [1] : tensor<4xf32> to tensor<4xf32> | |
%11 = tensor.extract_slice %arg2[%arg1] [4] [1] : tensor<4xf32> to tensor<4xf32> | |
%12 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%9, %10 : tensor<4xf32>, tensor<4xf32>) outs(%11 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg3: f32, %arg4: f32, %arg5: f32): | |
%14 = arith.mulf %arg3, %arg4 : f32 | |
linalg.yield %14 : f32 | |
} -> tensor<4xf32> | |
%13 = tensor.insert_slice %12 into %arg2[%arg1] [4] [1] : tensor<4xf32> into tensor<4xf32> | |
scf.yield %13 : tensor<4xf32> | |
} | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgFuse //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyTilePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%6, %7 : tensor<4xf32>, tensor<4xf32>) outs(%5 : tensor<4xf32>) attrs = {__internal_linalg_transform__ = "1", lowering_config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>, name = "mul.1"} { | |
^bb0(%arg1: f32, %arg2: f32, %arg3: f32): | |
%9 = arith.mulf %arg1, %arg2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<4xf32> | |
flow.dispatch.tensor.store %8, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyVectorizePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgSingleTilingExpert //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After LinalgInitTensorToAllocTensor //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = flow.dispatch.tensor.load %2, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<writeonly:4xf32> -> tensor<4xf32> | |
%6 = flow.dispatch.tensor.load %0, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%7 = flow.dispatch.tensor.load %1, offsets = [%arg0], sizes = [4], strides = [1] : !flow.dispatch.tensor<readonly:4xf32> -> tensor<4xf32> | |
%8 = vector.transfer_read %6[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %7[%c0], %cst {in_bounds = [true]} : tensor<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
%11 = vector.transfer_write %10, %5[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> | |
flow.dispatch.tensor.store %11, %2, offsets = [%arg0], sizes = [4], strides = [%c1] : tensor<4xf32> -> !flow.dispatch.tensor<writeonly:4xf32> | |
} | |
return | |
} | |
} | |
// -----// IR Dump After IREEComprehensiveBufferize //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %4, 64 : memref<4xf32> | |
%5 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %6 to %c4 step %7 { | |
%8 = bufferization.to_tensor %4 : memref<4xf32> | |
%9 = bufferization.to_tensor %0 : memref<4xf32> | |
%10 = bufferization.to_tensor %2 : memref<4xf32> | |
%11 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%12 = vector.transfer_read %2[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%13 = arith.mulf %11, %12 : vector<4xf32> | |
vector.transfer_write %13, %4[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
%14 = bufferization.to_tensor %4 : memref<4xf32> | |
linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4 : memref<4xf32>) outs(%4 : memref<4xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
linalg.yield %arg1 : f32 | |
} | |
} | |
return | |
} | |
} | |
// -----// IR Dump After ResolveShapedTypeResultDims //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %4, 64 : memref<4xf32> | |
%5 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %6 to %c4 step %7 { | |
%8 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %2[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
vector.transfer_write %10, %4[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%4 : memref<4xf32>) outs(%4 : memref<4xf32>) { | |
^bb0(%arg1: f32, %arg2: f32): | |
linalg.yield %arg1 : f32 | |
} | |
} | |
return | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %4, 64 : memref<4xf32> | |
%5 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %6 to %c4 step %7 { | |
%8 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %2[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
vector.transfer_write %10, %4[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %4, 64 : memref<4xf32> | |
%5 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %6 to %c4 step %7 { | |
%8 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %2[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
vector.transfer_write %10, %4[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:4xf32> | |
%4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %4, 64 : memref<4xf32> | |
%5 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<writeonly:4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%6 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%7 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %6 to %c4 step %7 { | |
%8 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%9 = vector.transfer_read %2[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%10 = arith.mulf %8, %9 : vector<4xf32> | |
vector.transfer_write %10, %4[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After CleanupBufferAllocView //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%workgroup_id_x = hal.interface.workgroup.id[0] : index | |
%workgroup_count_x = hal.interface.workgroup.count[0] : index | |
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_id_x] | |
%4 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%workgroup_count_x] | |
scf.for %arg0 = %3 to %c4 step %4 { | |
%5 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%6 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%7 = arith.mulf %5, %6 : vector<4xf32> | |
vector.transfer_write %7, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
} | |
return | |
} | |
// -----// IR Dump After RemoveSingleIterationLoop //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%cst = arith.constant 0.000000e+00 : f32 | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.transfer_read %0[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%4 = vector.transfer_read %1[%c0], %cst {in_bounds = [true]} : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.transfer_write %5, %2[%c0] {in_bounds = [true]} : vector<4xf32>, memref<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyLowerVectorsPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyEnablePass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgStrategyRemoveMarkersPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgVectorLowering //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LLVMCPULowerExecutableTarget //----- // | |
hal.executable.variant public @embedded_elf_x86_64, target = <"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]>} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
} | |
} | |
// -----// IR Dump After LinalgExtToLoops //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After MemrefCopyToLinalgPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LinalgLowerToLoops //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After ArithmeticBufferize //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
} | |
// -----// IR Dump After FoldTensorExtractOp //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
} | |
// -----// IR Dump After PolynomialApproximationPass //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After LLVMCPUCheckIRBeforeLLVMConversion //----- // | |
module { | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
} | |
// -----// IR Dump After SCFToControlFlow //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After CSE //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After ArithmeticExpandOps //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After ExpandOps //----- // | |
func.func @simple_mul_dispatch_0() { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %0, 64 : memref<4xf32> | |
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %1, 64 : memref<4xf32> | |
%2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : memref<4xf32> | |
memref.assume_alignment %2, 64 : memref<4xf32> | |
%3 = vector.load %0[%c0] : memref<4xf32>, vector<4xf32> | |
%4 = vector.load %1[%c0] : memref<4xf32>, vector<4xf32> | |
%5 = arith.mulf %3, %4 : vector<4xf32> | |
vector.store %5, %2[%c0] : memref<4xf32>, vector<4xf32> | |
return | |
} | |
// -----// IR Dump After ConvertToLLVM //----- // | |
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 { | |
%0 = llvm.mlir.constant(0 : index) : i64 | |
%1 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%2 = llvm.extractvalue %1[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%3 = llvm.mlir.constant(0 : i64) : i64 | |
%4 = llvm.load %2 : !llvm.ptr<ptr<i8>> | |
%5 = llvm.bitcast %4 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%6 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%7 = llvm.insertvalue %5, %6[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%8 = llvm.insertvalue %5, %7[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%9 = llvm.mlir.constant(0 : index) : i64 | |
%10 = llvm.insertvalue %9, %8[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%11 = llvm.mlir.constant(4 : index) : i64 | |
%12 = llvm.insertvalue %11, %10[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%13 = llvm.mlir.constant(1 : index) : i64 | |
%14 = llvm.insertvalue %13, %12[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%15 = llvm.extractvalue %14[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%16 = llvm.mlir.constant(0 : index) : i64 | |
%17 = llvm.mlir.constant(63 : index) : i64 | |
%18 = llvm.ptrtoint %15 : !llvm.ptr<f32> to i64 | |
%19 = llvm.and %18, %17 : i64 | |
%20 = llvm.icmp "eq" %19, %16 : i64 | |
"llvm.intr.assume"(%20) : (i1) -> () | |
%21 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%22 = llvm.extractvalue %21[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%23 = llvm.mlir.constant(1 : i64) : i64 | |
%24 = llvm.getelementptr %22[%23] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%25 = llvm.load %24 : !llvm.ptr<ptr<i8>> | |
%26 = llvm.bitcast %25 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%27 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%28 = llvm.insertvalue %26, %27[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%29 = llvm.insertvalue %26, %28[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%30 = llvm.mlir.constant(0 : index) : i64 | |
%31 = llvm.insertvalue %30, %29[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%32 = llvm.mlir.constant(4 : index) : i64 | |
%33 = llvm.insertvalue %32, %31[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%34 = llvm.mlir.constant(1 : index) : i64 | |
%35 = llvm.insertvalue %34, %33[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%36 = llvm.extractvalue %35[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%37 = llvm.mlir.constant(0 : index) : i64 | |
%38 = llvm.mlir.constant(63 : index) : i64 | |
%39 = llvm.ptrtoint %36 : !llvm.ptr<f32> to i64 | |
%40 = llvm.and %39, %38 : i64 | |
%41 = llvm.icmp "eq" %40, %37 : i64 | |
"llvm.intr.assume"(%41) : (i1) -> () | |
%42 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%43 = llvm.extractvalue %42[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%44 = llvm.mlir.constant(2 : i64) : i64 | |
%45 = llvm.getelementptr %43[%44] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%46 = llvm.load %45 : !llvm.ptr<ptr<i8>> | |
%47 = llvm.bitcast %46 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%48 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%49 = llvm.insertvalue %47, %48[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%50 = llvm.insertvalue %47, %49[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%51 = llvm.mlir.constant(0 : index) : i64 | |
%52 = llvm.insertvalue %51, %50[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%53 = llvm.mlir.constant(4 : index) : i64 | |
%54 = llvm.insertvalue %53, %52[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%55 = llvm.mlir.constant(1 : index) : i64 | |
%56 = llvm.insertvalue %55, %54[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%57 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%58 = llvm.mlir.constant(0 : index) : i64 | |
%59 = llvm.mlir.constant(63 : index) : i64 | |
%60 = llvm.ptrtoint %57 : !llvm.ptr<f32> to i64 | |
%61 = llvm.and %60, %59 : i64 | |
%62 = llvm.icmp "eq" %61, %58 : i64 | |
"llvm.intr.assume"(%62) : (i1) -> () | |
%63 = llvm.extractvalue %14[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%64 = llvm.getelementptr %63[%0] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32> | |
%65 = llvm.bitcast %64 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%66 = llvm.load %65 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%67 = llvm.extractvalue %35[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%68 = llvm.getelementptr %67[%0] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32> | |
%69 = llvm.bitcast %68 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%70 = llvm.load %69 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%71 = llvm.fmul %66, %70 : vector<4xf32> | |
%72 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%73 = llvm.getelementptr %72[%0] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32> | |
%74 = llvm.bitcast %73 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %71, %74 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%75 = llvm.mlir.constant(0 : i32) : i32 | |
llvm.return %75 : i32 | |
} | |
} | |
// -----// IR Dump After ReconcileUnrealizedCasts //----- // | |
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 { | |
%0 = llvm.mlir.constant(0 : index) : i64 | |
%1 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%2 = llvm.extractvalue %1[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%3 = llvm.mlir.constant(0 : i64) : i64 | |
%4 = llvm.load %2 : !llvm.ptr<ptr<i8>> | |
%5 = llvm.bitcast %4 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%6 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%7 = llvm.insertvalue %5, %6[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%8 = llvm.insertvalue %5, %7[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%9 = llvm.mlir.constant(0 : index) : i64 | |
%10 = llvm.insertvalue %9, %8[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%11 = llvm.mlir.constant(4 : index) : i64 | |
%12 = llvm.insertvalue %11, %10[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%13 = llvm.mlir.constant(1 : index) : i64 | |
%14 = llvm.insertvalue %13, %12[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%15 = llvm.mlir.constant(0 : index) : i64 | |
%16 = llvm.mlir.constant(63 : index) : i64 | |
%17 = llvm.ptrtoint %5 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %16 : i64 | |
%19 = llvm.icmp "eq" %18, %15 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.mlir.constant(1 : i64) : i64 | |
%23 = llvm.getelementptr %21[%22] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%24 = llvm.load %23 : !llvm.ptr<ptr<i8>> | |
%25 = llvm.bitcast %24 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%26 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%27 = llvm.insertvalue %25, %26[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%28 = llvm.insertvalue %25, %27[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%29 = llvm.mlir.constant(0 : index) : i64 | |
%30 = llvm.insertvalue %29, %28[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%31 = llvm.mlir.constant(4 : index) : i64 | |
%32 = llvm.insertvalue %31, %30[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%33 = llvm.mlir.constant(1 : index) : i64 | |
%34 = llvm.insertvalue %33, %32[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%35 = llvm.mlir.constant(0 : index) : i64 | |
%36 = llvm.mlir.constant(63 : index) : i64 | |
%37 = llvm.ptrtoint %25 : !llvm.ptr<f32> to i64 | |
%38 = llvm.and %37, %36 : i64 | |
%39 = llvm.icmp "eq" %38, %35 : i64 | |
"llvm.intr.assume"(%39) : (i1) -> () | |
%40 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%41 = llvm.extractvalue %40[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%42 = llvm.mlir.constant(2 : i64) : i64 | |
%43 = llvm.getelementptr %41[%42] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%44 = llvm.load %43 : !llvm.ptr<ptr<i8>> | |
%45 = llvm.bitcast %44 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%46 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%47 = llvm.insertvalue %45, %46[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%48 = llvm.insertvalue %45, %47[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%49 = llvm.mlir.constant(0 : index) : i64 | |
%50 = llvm.insertvalue %49, %48[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%51 = llvm.mlir.constant(4 : index) : i64 | |
%52 = llvm.insertvalue %51, %50[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%53 = llvm.mlir.constant(1 : index) : i64 | |
%54 = llvm.insertvalue %53, %52[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%55 = llvm.mlir.constant(0 : index) : i64 | |
%56 = llvm.mlir.constant(63 : index) : i64 | |
%57 = llvm.ptrtoint %45 : !llvm.ptr<f32> to i64 | |
%58 = llvm.and %57, %56 : i64 | |
%59 = llvm.icmp "eq" %58, %55 : i64 | |
"llvm.intr.assume"(%59) : (i1) -> () | |
%60 = llvm.bitcast %5 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%61 = llvm.load %60 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%62 = llvm.bitcast %25 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%63 = llvm.load %62 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%64 = llvm.fmul %61, %63 : vector<4xf32> | |
%65 = llvm.bitcast %45 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %64, %65 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%66 = llvm.mlir.constant(0 : i32) : i32 | |
llvm.return %66 : i32 | |
} | |
} | |
// -----// IR Dump After LLVMCPUSynchronizeSymbolVisibility //----- // | |
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : index) : i64 | |
%1 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%2 = llvm.extractvalue %1[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%3 = llvm.mlir.constant(0 : i64) : i64 | |
%4 = llvm.load %2 : !llvm.ptr<ptr<i8>> | |
%5 = llvm.bitcast %4 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%6 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%7 = llvm.insertvalue %5, %6[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%8 = llvm.insertvalue %5, %7[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%9 = llvm.mlir.constant(0 : index) : i64 | |
%10 = llvm.insertvalue %9, %8[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%11 = llvm.mlir.constant(4 : index) : i64 | |
%12 = llvm.insertvalue %11, %10[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%13 = llvm.mlir.constant(1 : index) : i64 | |
%14 = llvm.insertvalue %13, %12[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%15 = llvm.mlir.constant(0 : index) : i64 | |
%16 = llvm.mlir.constant(63 : index) : i64 | |
%17 = llvm.ptrtoint %5 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %16 : i64 | |
%19 = llvm.icmp "eq" %18, %15 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.mlir.constant(1 : i64) : i64 | |
%23 = llvm.getelementptr %21[%22] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%24 = llvm.load %23 : !llvm.ptr<ptr<i8>> | |
%25 = llvm.bitcast %24 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%26 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%27 = llvm.insertvalue %25, %26[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%28 = llvm.insertvalue %25, %27[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%29 = llvm.mlir.constant(0 : index) : i64 | |
%30 = llvm.insertvalue %29, %28[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%31 = llvm.mlir.constant(4 : index) : i64 | |
%32 = llvm.insertvalue %31, %30[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%33 = llvm.mlir.constant(1 : index) : i64 | |
%34 = llvm.insertvalue %33, %32[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%35 = llvm.mlir.constant(0 : index) : i64 | |
%36 = llvm.mlir.constant(63 : index) : i64 | |
%37 = llvm.ptrtoint %25 : !llvm.ptr<f32> to i64 | |
%38 = llvm.and %37, %36 : i64 | |
%39 = llvm.icmp "eq" %38, %35 : i64 | |
"llvm.intr.assume"(%39) : (i1) -> () | |
%40 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%41 = llvm.extractvalue %40[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%42 = llvm.mlir.constant(2 : i64) : i64 | |
%43 = llvm.getelementptr %41[%42] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%44 = llvm.load %43 : !llvm.ptr<ptr<i8>> | |
%45 = llvm.bitcast %44 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%46 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%47 = llvm.insertvalue %45, %46[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%48 = llvm.insertvalue %45, %47[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%49 = llvm.mlir.constant(0 : index) : i64 | |
%50 = llvm.insertvalue %49, %48[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%51 = llvm.mlir.constant(4 : index) : i64 | |
%52 = llvm.insertvalue %51, %50[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%53 = llvm.mlir.constant(1 : index) : i64 | |
%54 = llvm.insertvalue %53, %52[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> | |
%55 = llvm.mlir.constant(0 : index) : i64 | |
%56 = llvm.mlir.constant(63 : index) : i64 | |
%57 = llvm.ptrtoint %45 : !llvm.ptr<f32> to i64 | |
%58 = llvm.and %57, %56 : i64 | |
%59 = llvm.icmp "eq" %58, %55 : i64 | |
"llvm.intr.assume"(%59) : (i1) -> () | |
%60 = llvm.bitcast %5 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%61 = llvm.load %60 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%62 = llvm.bitcast %25 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%63 = llvm.load %62 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%64 = llvm.fmul %61, %63 : vector<4xf32> | |
%65 = llvm.bitcast %45 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %64, %65 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%66 = llvm.mlir.constant(0 : i32) : i32 | |
llvm.return %66 : i32 | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::TranslateTargetExecutableVariantsPass //----- // | |
hal.executable.variant public @embedded_elf_x86_64, target = <"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]>} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::TranslateExecutablesPass //----- // | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = <"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]>} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::(anonymous namespace)::ConvertToHALPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
%device_1 = hal.ex.shared_device : !hal.device | |
%allocator_2 = hal.device.allocator<%device_1 : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator_2 : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%device_3 = hal.ex.shared_device : !hal.device | |
%allocator_4 = hal.device.allocator<%device_3 : !hal.device> : !hal.allocator | |
%buffer_5 = hal.allocator.allocate<%allocator_4 : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%device_6 = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device_6 : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device | |
hal.device.switch<%0 : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%0 : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
%c0_11 = arith.constant 0 : index | |
%c1_12 = arith.constant 1 : index | |
%c2 = arith.constant 2 : index | |
%c0_13 = arith.constant 0 : index | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0_13] bindings([ | |
%c0_11 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1_12 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_5 : !hal.buffer)[%c0, %c16] | |
]) | |
%c1_14 = arith.constant 1 : index | |
%1 = affine.apply #map()[%c4] | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%1, %c1_14, %c1_14]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device_6, %cmd | |
%c0_7 = arith.constant 0 : index | |
%c4_8 = arith.constant 4 : index | |
%c553648160_i32_9 = arith.constant 553648160 : i32 | |
%c1_i32_10 = arith.constant 1 : i32 | |
%view = hal.buffer_view.create buffer(%buffer_5 : !hal.buffer) shape([%c4_8]) type(%c553648160_i32_9) encoding(%c1_i32_10) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
%device_1 = hal.ex.shared_device : !hal.device | |
%allocator_2 = hal.device.allocator<%device_1 : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator_2 : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%device_3 = hal.ex.shared_device : !hal.device | |
%allocator_4 = hal.device.allocator<%device_3 : !hal.device> : !hal.allocator | |
%buffer_5 = hal.allocator.allocate<%allocator_4 : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%device_6 = hal.ex.shared_device : !hal.device | |
%cmd = hal.command_buffer.create device(%device_6 : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device_6 : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device_6 : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_5 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device_6, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_5 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::LinkTargetExecutablesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::LinkExecutablesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch.symbol<%cmd : !hal.command_buffer> target(@simple_mul_dispatch_0::@embedded_elf_x86_64::@simple_mul_dispatch_0) workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ResolveExportOrdinalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%executable_layout = hal.executable_layout.lookup device(%device : !hal.device) layout(#executable_layout) : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%executable_layout : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device | |
%exe = hal.executable.lookup device(%0 : !hal.device) executable(@simple_mul_dispatch_0) : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%exe : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::MaterializeResourceCachesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%0 = hal.device.switch<%device : !hal.device> -> !hal.executable | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
hal.return %exe : !hal.executable | |
}, | |
#hal.match.always { | |
%1 = util.null : !hal.executable | |
hal.return %1 : !hal.executable | |
} | |
util.global.store %0, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
hal.device.switch<%device : !hal.device> | |
#hal.device.match.executable.format<"embedded-elf-x86_64"> { | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.return | |
} | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass //----- // | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = arith.constant true | |
cf.cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%0 = util.null : !hal.executable | |
cf.br ^bb5(%0 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
util.unreachable "device not supported in the compiled configuration" | |
^bb5(%1: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::InlineDeviceSwitchesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
cf.br ^bb3 | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
^bb3: // pred: ^bb1 | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::MemoizeDeviceQueriesPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_device_query_0_ok : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %ok, @_device_query_0_ok : i1 | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%_device_query_0_ok = util.global.load @_device_query_0_ok : i1 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb5(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%true = arith.constant true | |
cf.cond_br %true, ^bb3, ^bb4 | |
^bb3: // pred: ^bb2 | |
%0 = util.null : !hal.executable | |
cf.br ^bb5(%0 : !hal.executable) | |
^bb4: // pred: ^bb2 | |
util.unreachable "device not supported in the compiled configuration" | |
^bb5(%1: !hal.executable): // 2 preds: ^bb1, ^bb3 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
%_device_query_0_ok = util.global.load @_device_query_0_ok : i1 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%0 = hal.command_buffer.device<%cmd : !hal.command_buffer> : !hal.device | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
cf.br ^bb3 | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
^bb3: // pred: ^bb1 | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_device_query_0_ok : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %ok, @_device_query_0_ok : i1 | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_device_query_0_ok : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %ok, @_device_query_0_ok : i1 | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %ok, @_device_query_0_ok : i1 | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_device_query_0_ok : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %ok, @_device_query_0_ok : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#map = affine_map<()[s0] -> (s0 ceildiv 4)> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%0 = affine.apply #map()[%arg1] | |
hal.return %0, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ElideRedundantCommandsPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ElideRedundantCommandsPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ElideRedundantCommandsPass //----- // | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ElideRedundantCommandsPass //----- // | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::ElideRedundantCommandsPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After ConvertAffineToStandard //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
util.initializer.return | |
} | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.initializer.return | |
} | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.initializer { | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
util.initializer.return | |
} | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%c1_0 = arith.constant 1 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1_0 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1_0 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::CombineInitializersPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
%device_0 = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device_0 : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device_1 = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device_1 : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device_2 = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device_2 : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
cf.br ^bb4 | |
^bb4: // pred: ^bb3 | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%c1_0 = arith.constant 1 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1_0 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1_0 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
%device_0 = hal.ex.shared_device : !hal.device | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device_0 : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%device_1 = hal.ex.shared_device : !hal.device | |
%executable_layout = hal.executable_layout.create device(%device_1 : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%device_2 = hal.ex.shared_device : !hal.device | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device_2 : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
util.global.store %value, @_device_query_0 : i1 | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%_descriptor_set_layout_0 = util.global.load @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%_descriptor_set_layout_0]) : !hal.executable_layout | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::SimplifyGlobalAccessesPass //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
%c1_i32 = arith.constant 1 : i32 | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c16 = arith.constant 16 : index | |
%c0 = arith.constant 0 : index | |
%c2 = arith.constant 2 : index | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::ApplyPatternsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %descriptor_set_layout, @_descriptor_set_layout_0 : !hal.descriptor_set_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FoldGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::Util::(anonymous namespace)::FuseGlobalsPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]> | |
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert workload_per_wg = [4]> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ { | |
hal.executable.export public @simple_mul_dispatch_0 ordinal(0) layout(#executable_layout) {translation_info = #translation} { | |
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index): | |
%c1 = arith.constant 1 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%0 = arith.cmpi sle, %arg1, %c0 : index | |
%1 = arith.subi %c0, %arg1 : index | |
%2 = arith.subi %arg1, %c1 : index | |
%3 = arith.select %0, %1, %2 : index | |
%4 = arith.divsi %3, %c4 : index | |
%5 = arith.subi %c0, %4 : index | |
%6 = arith.addi %4, %c1 : index | |
%7 = arith.select %0, %5, %6 : index | |
hal.return %7, %c1, %c1 : index, index, index | |
} | |
builtin.module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} { | |
llvm.func internal @simple_mul_dispatch_0(%arg0: !llvm.ptr<struct<"iree_hal_executable_environment_v0_t", (ptr<i32>, ptr<func<i32 (ptr<func<i32 (ptr<i8>)>>, ptr<i8>)>>, ptr<ptr<func<i32 (ptr<i8>)>>>, struct<"iree_hal_processor_v0_t", (array<8 x i64>)>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg1: !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> {llvm.align = 16 : i64, llvm.noalias}, %arg2: !llvm.ptr<struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr<ptr<i8>>, i32)>> {llvm.align = 16 : i64, llvm.noalias}) -> i32 attributes {sym_visibility = "private"} { | |
%0 = llvm.mlir.constant(0 : i32) : i32 | |
%1 = llvm.mlir.constant(2 : i64) : i64 | |
%2 = llvm.mlir.constant(1 : i64) : i64 | |
%3 = llvm.mlir.constant(63 : index) : i64 | |
%4 = llvm.mlir.constant(0 : index) : i64 | |
%5 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%6 = llvm.extractvalue %5[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%7 = llvm.load %6 : !llvm.ptr<ptr<i8>> | |
%8 = llvm.bitcast %7 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%9 = llvm.ptrtoint %8 : !llvm.ptr<f32> to i64 | |
%10 = llvm.and %9, %3 : i64 | |
%11 = llvm.icmp "eq" %10, %4 : i64 | |
"llvm.intr.assume"(%11) : (i1) -> () | |
%12 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%13 = llvm.extractvalue %12[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%14 = llvm.getelementptr %13[%2] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%15 = llvm.load %14 : !llvm.ptr<ptr<i8>> | |
%16 = llvm.bitcast %15 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%17 = llvm.ptrtoint %16 : !llvm.ptr<f32> to i64 | |
%18 = llvm.and %17, %3 : i64 | |
%19 = llvm.icmp "eq" %18, %4 : i64 | |
"llvm.intr.assume"(%19) : (i1) -> () | |
%20 = llvm.load %arg1 : !llvm.ptr<struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)>> | |
%21 = llvm.extractvalue %20[10] : !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr<i32>, ptr<ptr<i8>>, ptr<i64>)> | |
%22 = llvm.getelementptr %21[%1] : (!llvm.ptr<ptr<i8>>, i64) -> !llvm.ptr<ptr<i8>> | |
%23 = llvm.load %22 : !llvm.ptr<ptr<i8>> | |
%24 = llvm.bitcast %23 : !llvm.ptr<i8> to !llvm.ptr<f32> | |
%25 = llvm.ptrtoint %24 : !llvm.ptr<f32> to i64 | |
%26 = llvm.and %25, %3 : i64 | |
%27 = llvm.icmp "eq" %26, %4 : i64 | |
"llvm.intr.assume"(%27) : (i1) -> () | |
%28 = llvm.bitcast %8 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%29 = llvm.load %28 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%30 = llvm.bitcast %16 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
%31 = llvm.load %30 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
%32 = llvm.fmul %29, %31 : vector<4xf32> | |
%33 = llvm.bitcast %24 : !llvm.ptr<f32> to !llvm.ptr<vector<4xf32>> | |
llvm.store %32, %33 {alignment = 4 : i64} : !llvm.ptr<vector<4xf32>> | |
llvm.return %0 : i32 | |
} | |
} | |
} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::SerializeTargetExecutablesPass //----- // | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::HAL::SerializeExecutablesPass //----- // | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"} | |
} | |
// -----// IR Dump After SymbolDCE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After LoopInvariantCodeMotion //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After SCFToControlFlow //----- // | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
// -----// IR Dump After LoopCoalescing //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After LoopInvariantCodeMotion //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After SCFToControlFlow //----- // | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu]} { | |
util.global private @_device_query_0 : i1 | |
util.global private @_executable_layout_0 : !hal.executable_layout | |
util.global private @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer { | |
%device = hal.ex.shared_device : !hal.device | |
%ok, %value = hal.device.query<%device : !hal.device> key("hal.executable.format" :: "embedded-elf-x86_64") : i1, i1 = false | |
%descriptor_set_layout = hal.descriptor_set_layout.create device(%device : !hal.device) usage(push_only) bindings([#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]) : !hal.descriptor_set_layout | |
%executable_layout = hal.executable_layout.create device(%device : !hal.device) push_constants(0) layouts([%descriptor_set_layout]) : !hal.executable_layout | |
util.global.store %value, @_device_query_0 : i1 | |
util.global.store %executable_layout, @_executable_layout_0 : !hal.executable_layout | |
cf.cond_br %value, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%exe = hal.executable.create device(%device : !hal.device) target(@simple_mul_dispatch_0::@embedded_elf_x86_64) layouts([%_executable_layout_0]) : !hal.executable | |
cf.br ^bb3(%exe : !hal.executable) | |
^bb2: // pred: ^bb0 | |
%0 = util.null : !hal.executable | |
cf.br ^bb3(%0 : !hal.executable) | |
^bb3(%1: !hal.executable): // 2 preds: ^bb1, ^bb2 | |
util.global.store %1, @_executable_simple_mul_dispatch_0 : !hal.executable | |
util.initializer.return | |
} | |
hal.executable private @simple_mul_dispatch_0 { | |
hal.executable.binary public @embedded_elf_x86_64 attributes {data = dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8>, format = "embedded-elf-x86_64", mime_type = "application/x-elf"} | |
} | |
func.func @simple_mul(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} { | |
%c2 = arith.constant 2 : index | |
%c0 = arith.constant 0 : index | |
%c16 = arith.constant 16 : index | |
%c4 = arith.constant 4 : index | |
%c1 = arith.constant 1 : index | |
%c553648160_i32 = arith.constant 553648160 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%_device_query_0 = util.global.load @_device_query_0 : i1 | |
%_executable_layout_0 = util.global.load @_executable_layout_0 : !hal.executable_layout | |
%_executable_simple_mul_dispatch_0 = util.global.load @_executable_simple_mul_dispatch_0 : !hal.executable | |
hal.buffer_view.assert<%arg0 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer = hal.buffer_view.buffer<%arg0 : !hal.buffer_view> : !hal.buffer | |
%device = hal.ex.shared_device : !hal.device | |
%allocator = hal.device.allocator<%device : !hal.device> : !hal.allocator | |
hal.buffer.assert<%buffer : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
hal.buffer_view.assert<%arg1 : !hal.buffer_view> message("tensor") shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) | |
%buffer_0 = hal.buffer_view.buffer<%arg1 : !hal.buffer_view> : !hal.buffer | |
hal.buffer.assert<%buffer_0 : !hal.buffer> message("tensor") allocator(%allocator : !hal.allocator) minimum_length(%c16) type(DeviceVisible) usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage") | |
%buffer_1 = hal.allocator.allocate<%allocator : !hal.allocator> type("HostVisible|DeviceVisible|DeviceLocal") usage("TransferSource|TransferTarget|Transfer|DispatchStorageRead|DispatchStorageWrite|DispatchStorage|MappingScoped|MappingAccessRandom|Mapping") : !hal.buffer{%c16} | |
%cmd = hal.command_buffer.create device(%device : !hal.device) mode("OneShot|AllowInlineExecution") categories("Transfer|Dispatch") : !hal.command_buffer | |
hal.command_buffer.begin<%cmd : !hal.command_buffer> | |
cf.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
hal.command_buffer.push_descriptor_set<%cmd : !hal.command_buffer> layout(%_executable_layout_0 : !hal.executable_layout)[%c0] bindings([ | |
%c0 = (%buffer : !hal.buffer)[%c0, %c16], | |
%c1 = (%buffer_0 : !hal.buffer)[%c0, %c16], | |
%c2 = (%buffer_1 : !hal.buffer)[%c0, %c16] | |
]) | |
hal.command_buffer.dispatch<%cmd : !hal.command_buffer> target(%_executable_simple_mul_dispatch_0 : !hal.executable)[0] workgroups([%c1, %c1, %c1]) | |
hal.command_buffer.execution_barrier<%cmd : !hal.command_buffer> source("Dispatch|Transfer|CommandRetire") target("CommandIssue|Dispatch|Transfer") flags("None") | |
hal.command_buffer.end<%cmd : !hal.command_buffer> | |
hal.ex.submit_and_wait %device, %cmd | |
%view = hal.buffer_view.create buffer(%buffer_1 : !hal.buffer) shape([%c4]) type(%c553648160_i32) encoding(%c1_i32) : !hal.buffer_view | |
return %view : !hal.buffer_view | |
^bb2: // pred: ^bb0 | |
util.unreachable "device not supported in the compiled configuration" | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::VM::ConversionPass //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu], vm.toplevel} { | |
vm.module public @module { | |
vm.global.i32 private @_device_query_0 : i32 | |
vm.global.ref private @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.initializer { | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%buffer = vm.rodata.inline "_utf8_hal_executable_format_EAB228F999C2D3A1" {alignment = 1 : i64} : !vm.buffer = dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
%buffer_0 = vm.rodata.inline "_utf8_embedded_elf_x86_64_9FD8733DA4A6F228" {alignment = 1 : i64} : !vm.buffer = dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
%0:2 = vm.call @hal.device.query.i32(%ref, %buffer, %buffer_0) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%c1 = vm.const.i32 1 | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%zero = vm.const.i32.zero | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%c1_1 = vm.const.i32 1 | |
%c1_2 = vm.const.i32 1 | |
%zero_3 = vm.const.i32.zero | |
%c7 = vm.const.i32 7 | |
%c1_4 = vm.const.i32 1 | |
%c7_5 = vm.const.i32 7 | |
%c2 = vm.const.i32 2 | |
%c7_6 = vm.const.i32 7 | |
%ref_7 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1_2, [(%zero_3, %c7), (%c1_4, %c7_5), (%c2, %c7_6)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%zero_8 = vm.const.i32.zero | |
%ref_9 = vm.call.variadic @hal.executable_layout.create(%ref, %zero_8, [%ref_7]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_9, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%buffer_10 = vm.rodata.inline "_utf8_embedded_elf_x86_64_9FD8733DA4A6F228" {alignment = 1 : i64} : !vm.buffer = dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%null = vm.const.ref.zero : !vm.buffer | |
%ref_11 = vm.call.variadic @hal.executable.create(%ref, %buffer_10, %simple_mul_dispatch_0_embedded_elf_x86_64, %null, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_11 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null_12 = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null_12 : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c2 = vm.const.i32 2 | |
%zero = vm.const.i32.zero | |
%c16 = vm.const.i32 16 | |
%c4 = vm.const.i32 4 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%c1_0 = vm.const.i32 1 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%buffer = vm.rodata.inline "_utf8_tensor_3C6209B4FD120BDC" {alignment = 1 : i64} : !vm.buffer = dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
%c4_1 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %buffer, %c553648160, %c1_0, [%c4_1]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%buffer_4 = vm.rodata.inline "_utf8_tensor_3C6209B4FD120BDC" {alignment = 1 : i64} : !vm.buffer = dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
%c16_5 = vm.const.i64 16 | |
%c16_6 = vm.const.i32 16 | |
%c3075 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref, %buffer_4, %ref_3, %c16_5, %c16_6, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%buffer_7 = vm.rodata.inline "_utf8_tensor_3C6209B4FD120BDC" {alignment = 1 : i64} : !vm.buffer = dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
%c4_8 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %buffer_7, %c553648160, %c1_0, [%c4_8]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_9 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%buffer_10 = vm.rodata.inline "_utf8_tensor_3C6209B4FD120BDC" {alignment = 1 : i64} : !vm.buffer = dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
%c16_11 = vm.const.i64 16 | |
%c16_12 = vm.const.i32 16 | |
%c3075_13 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref_9, %buffer_10, %ref_3, %c16_11, %c16_12, %c3075_13) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%c50 = vm.const.i32 50 | |
%c150998019 = vm.const.i32 150998019 | |
%c16_14 = vm.const.i64 16 | |
%ref_15 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16_14) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%c17 = vm.const.i32 17 | |
%c3 = vm.const.i32 3 | |
%ref_16 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%zero_17 = vm.const.i64.zero | |
%c16_18 = vm.const.i64 16 | |
%zero_19 = vm.const.i64.zero | |
%c16_20 = vm.const.i64 16 | |
%zero_21 = vm.const.i64.zero | |
%c16_22 = vm.const.i64 16 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_16, %_executable_layout_0, %zero, [(%zero, %ref, %zero_17, %c16_18), (%c1, %ref_9, %zero_19, %c16_20), (%c2, %ref_15, %zero_21, %c16_22)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
%zero_23 = vm.const.i32.zero | |
vm.call @hal.command_buffer.dispatch(%ref_16, %_executable_simple_mul_dispatch_0, %zero_23, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c28 = vm.const.i32 28 | |
%c13 = vm.const.i32 13 | |
%zero_24 = vm.const.i32.zero | |
vm.call @hal.command_buffer.execution_barrier(%ref_16, %c28, %c13, %zero_24) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_16) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c4_25 = vm.const.i64 4 | |
%ref_26 = vm.call.variadic @hal.buffer_view.create(%ref_15, %c553648160, %c1_0, [%c4_25]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_26 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
%c2_27 = vm.const.i32 2 | |
vm.fail %c2_27, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
} | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::VM::HoistInlinedRodataPass //----- // | |
vm.module public @module { | |
vm.global.i32 private @_device_query_0 : i32 | |
vm.global.ref private @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_0 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.initializer { | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%c1 = vm.const.i32 1 | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%zero = vm.const.i32.zero | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%c1_0 = vm.const.i32 1 | |
%c1_1 = vm.const.i32 1 | |
%zero_2 = vm.const.i32.zero | |
%c7 = vm.const.i32 7 | |
%c1_3 = vm.const.i32 1 | |
%c7_4 = vm.const.i32 7 | |
%c2 = vm.const.i32 2 | |
%c7_5 = vm.const.i32 7 | |
%ref_6 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1_1, [(%zero_2, %c7), (%c1_3, %c7_4), (%c2, %c7_5)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%zero_7 = vm.const.i32.zero | |
%ref_8 = vm.call.variadic @hal.executable_layout.create(%ref, %zero_7, [%ref_6]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_8, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_0 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_0 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%null = vm.const.ref.zero : !vm.buffer | |
%ref_9 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_0, %simple_mul_dispatch_0_embedded_elf_x86_64, %null, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_9 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null_10 = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null_10 : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC_1 {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC_2 {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC_3 {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c2 = vm.const.i32 2 | |
%zero = vm.const.i32.zero | |
%c16 = vm.const.i32 16 | |
%c4 = vm.const.i32 4 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%c1_0 = vm.const.i32 1 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c4_1 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1_0, [%c4_1]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_1 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC_1 : !vm.buffer | |
%c16_4 = vm.const.i64 16 | |
%c16_5 = vm.const.i32 16 | |
%c3075 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_1, %ref_3, %c16_4, %c16_5, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_2 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC_2 : !vm.buffer | |
%c4_6 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_2, %c553648160, %c1_0, [%c4_6]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_7 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_3 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC_3 : !vm.buffer | |
%c16_8 = vm.const.i64 16 | |
%c16_9 = vm.const.i32 16 | |
%c3075_10 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref_7, %_utf8_tensor_3C6209B4FD120BDC_3, %ref_3, %c16_8, %c16_9, %c3075_10) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%c50 = vm.const.i32 50 | |
%c150998019 = vm.const.i32 150998019 | |
%c16_11 = vm.const.i64 16 | |
%ref_12 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16_11) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%c17 = vm.const.i32 17 | |
%c3 = vm.const.i32 3 | |
%ref_13 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%zero_14 = vm.const.i64.zero | |
%c16_15 = vm.const.i64 16 | |
%zero_16 = vm.const.i64.zero | |
%c16_17 = vm.const.i64 16 | |
%zero_18 = vm.const.i64.zero | |
%c16_19 = vm.const.i64 16 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_13, %_executable_layout_0, %zero, [(%zero, %ref, %zero_14, %c16_15), (%c1, %ref_7, %zero_16, %c16_17), (%c2, %ref_12, %zero_18, %c16_19)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
%zero_20 = vm.const.i32.zero | |
vm.call @hal.command_buffer.dispatch(%ref_13, %_executable_simple_mul_dispatch_0, %zero_20, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c28 = vm.const.i32 28 | |
%c13 = vm.const.i32 13 | |
%zero_21 = vm.const.i32.zero | |
vm.call @hal.command_buffer.execution_barrier(%ref_13, %c28, %c13, %zero_21) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_13) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_13) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c4_22 = vm.const.i64 4 | |
%ref_23 = vm.call.variadic @hal.buffer_view.create(%ref_12, %c553648160, %c1_0, [%c4_22]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_23 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
%c2_24 = vm.const.i32 2 | |
vm.fail %c2_24, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::VM::DeduplicateRodataPass //----- // | |
vm.module public @module { | |
vm.global.i32 private @_device_query_0 : i32 | |
vm.global.ref private @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.initializer { | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%c1 = vm.const.i32 1 | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%zero = vm.const.i32.zero | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%c1_0 = vm.const.i32 1 | |
%c1_1 = vm.const.i32 1 | |
%zero_2 = vm.const.i32.zero | |
%c7 = vm.const.i32 7 | |
%c1_3 = vm.const.i32 1 | |
%c7_4 = vm.const.i32 7 | |
%c2 = vm.const.i32 2 | |
%c7_5 = vm.const.i32 7 | |
%ref_6 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1_1, [(%zero_2, %c7), (%c1_3, %c7_4), (%c2, %c7_5)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%zero_7 = vm.const.i32.zero | |
%ref_8 = vm.call.variadic @hal.executable_layout.create(%ref, %zero_7, [%ref_6]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_8, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_9 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%null = vm.const.ref.zero : !vm.buffer | |
%ref_10 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_9, %simple_mul_dispatch_0_embedded_elf_x86_64, %null, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_10 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null_11 = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null_11 : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c2 = vm.const.i32 2 | |
%zero = vm.const.i32.zero | |
%c16 = vm.const.i32 16 | |
%c4 = vm.const.i32 4 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%c1_0 = vm.const.i32 1 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c4_1 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1_0, [%c4_1]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_4 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c16_5 = vm.const.i64 16 | |
%c16_6 = vm.const.i32 16 | |
%c3075 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_4, %ref_3, %c16_5, %c16_6, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_7 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c4_8 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_7, %c553648160, %c1_0, [%c4_8]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_9 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_10 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c16_11 = vm.const.i64 16 | |
%c16_12 = vm.const.i32 16 | |
%c3075_13 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref_9, %_utf8_tensor_3C6209B4FD120BDC_10, %ref_3, %c16_11, %c16_12, %c3075_13) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%c50 = vm.const.i32 50 | |
%c150998019 = vm.const.i32 150998019 | |
%c16_14 = vm.const.i64 16 | |
%ref_15 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16_14) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%c17 = vm.const.i32 17 | |
%c3 = vm.const.i32 3 | |
%ref_16 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%zero_17 = vm.const.i64.zero | |
%c16_18 = vm.const.i64 16 | |
%zero_19 = vm.const.i64.zero | |
%c16_20 = vm.const.i64 16 | |
%zero_21 = vm.const.i64.zero | |
%c16_22 = vm.const.i64 16 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_16, %_executable_layout_0, %zero, [(%zero, %ref, %zero_17, %c16_18), (%c1, %ref_9, %zero_19, %c16_20), (%c2, %ref_15, %zero_21, %c16_22)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
%zero_23 = vm.const.i32.zero | |
vm.call @hal.command_buffer.dispatch(%ref_16, %_executable_simple_mul_dispatch_0, %zero_23, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c28 = vm.const.i32 28 | |
%c13 = vm.const.i32 13 | |
%zero_24 = vm.const.i32.zero | |
vm.call @hal.command_buffer.execution_barrier(%ref_16, %c28, %c13, %zero_24) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_16) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c4_25 = vm.const.i64 4 | |
%ref_26 = vm.call.variadic @hal.buffer_view.create(%ref_15, %c553648160, %c1_0, [%c4_25]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_26 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
%c2_27 = vm.const.i32 2 | |
vm.fail %c2_27, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
} | |
// -----// IR Dump After mlir::iree_compiler::IREE::VM::GlobalInitializationPass //----- // | |
vm.module public @module { | |
vm.global.i32 private mutable @_device_query_0 : i32 | |
vm.global.ref private mutable @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private mutable @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c2 = vm.const.i32 2 | |
%zero = vm.const.i32.zero | |
%c16 = vm.const.i32 16 | |
%c4 = vm.const.i32 4 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%c1_0 = vm.const.i32 1 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c4_1 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1_0, [%c4_1]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_4 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c16_5 = vm.const.i64 16 | |
%c16_6 = vm.const.i32 16 | |
%c3075 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_4, %ref_3, %c16_5, %c16_6, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_7 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c4_8 = vm.const.i64 4 | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_7, %c553648160, %c1_0, [%c4_8]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_9 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_10 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
%c16_11 = vm.const.i64 16 | |
%c16_12 = vm.const.i32 16 | |
%c3075_13 = vm.const.i32 3075 | |
vm.call @hal.buffer.assert(%ref_9, %_utf8_tensor_3C6209B4FD120BDC_10, %ref_3, %c16_11, %c16_12, %c3075_13) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%c50 = vm.const.i32 50 | |
%c150998019 = vm.const.i32 150998019 | |
%c16_14 = vm.const.i64 16 | |
%ref_15 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16_14) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%c17 = vm.const.i32 17 | |
%c3 = vm.const.i32 3 | |
%ref_16 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%zero_17 = vm.const.i64.zero | |
%c16_18 = vm.const.i64 16 | |
%zero_19 = vm.const.i64.zero | |
%c16_20 = vm.const.i64 16 | |
%zero_21 = vm.const.i64.zero | |
%c16_22 = vm.const.i64 16 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_16, %_executable_layout_0, %zero, [(%zero, %ref, %zero_17, %c16_18), (%c1, %ref_9, %zero_19, %c16_20), (%c2, %ref_15, %zero_21, %c16_22)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
%zero_23 = vm.const.i32.zero | |
vm.call @hal.command_buffer.dispatch(%ref_16, %_executable_simple_mul_dispatch_0, %zero_23, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
%c28 = vm.const.i32 28 | |
%c13 = vm.const.i32 13 | |
%zero_24 = vm.const.i32.zero | |
vm.call @hal.command_buffer.execution_barrier(%ref_16, %c28, %c13, %zero_24) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_16) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_16) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%c4_25 = vm.const.i64 4 | |
%ref_26 = vm.call.variadic @hal.buffer_view.create(%ref_15, %c553648160, %c1_0, [%c4_25]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_26 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
%c2_27 = vm.const.i32 2 | |
vm.fail %c2_27, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
vm.export @__init | |
vm.func private @__init() { | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%c1 = vm.const.i32 1 | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%zero = vm.const.i32.zero | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%c1_0 = vm.const.i32 1 | |
%c1_1 = vm.const.i32 1 | |
%zero_2 = vm.const.i32.zero | |
%c7 = vm.const.i32 7 | |
%c1_3 = vm.const.i32 1 | |
%c7_4 = vm.const.i32 7 | |
%c2 = vm.const.i32 2 | |
%c7_5 = vm.const.i32 7 | |
%ref_6 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1_1, [(%zero_2, %c7), (%c1_3, %c7_4), (%c2, %c7_5)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%zero_7 = vm.const.i32.zero | |
%ref_8 = vm.call.variadic @hal.executable_layout.create(%ref, %zero_7, [%ref_6]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_8, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_9 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%null = vm.const.ref.zero : !vm.buffer | |
%ref_10 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_9, %simple_mul_dispatch_0_embedded_elf_x86_64, %null, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_10 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
%null_11 = vm.const.ref.zero : !vm.ref<!hal.executable> | |
vm.br ^bb3(%null_11 : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.br ^bb4 | |
^bb4: // pred: ^bb3 | |
vm.return | |
} | |
vm.export @__deinit | |
vm.func private @__deinit() { | |
vm.return | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
vm.func private @__deinit() { | |
vm.return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
vm.func private @__init() { | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%null_0 = vm.const.ref.zero : !vm.buffer | |
%c2 = vm.const.i32 2 | |
%c7 = vm.const.i32 7 | |
%zero = vm.const.i32.zero | |
%c1 = vm.const.i32 1 | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7), (%c1, %c7), (%c2, %c7)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%ref_1]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_2, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%ref_4 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3, %simple_mul_dispatch_0_embedded_elf_x86_64, %null_0, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_4 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c13 = vm.const.i32 13 | |
%c28 = vm.const.i32 28 | |
%zero = vm.const.i64.zero | |
%c3 = vm.const.i32 3 | |
%c17 = vm.const.i32 17 | |
%c150998019 = vm.const.i32 150998019 | |
%c50 = vm.const.i32 50 | |
%c3075 = vm.const.i32 3075 | |
%c16 = vm.const.i64 16 | |
%c4 = vm.const.i64 4 | |
%c2 = vm.const.i32 2 | |
%zero_0 = vm.const.i32.zero | |
%c16_1 = vm.const.i32 16 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_4 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_4, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_5 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_5, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_6 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_7 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref_6, %_utf8_tensor_3C6209B4FD120BDC_7, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%ref_8 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%ref_9 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_9, %_executable_layout_0, %zero_0, [(%zero_0, %ref, %zero, %c16), (%c1, %ref_6, %zero, %c16), (%c2, %ref_8, %zero, %c16)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
vm.call @hal.command_buffer.dispatch(%ref_9, %_executable_simple_mul_dispatch_0, %zero_0, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_9, %c28, %c13, %zero_0) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_9) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_10 = vm.call.variadic @hal.buffer_view.create(%ref_8, %c553648160, %c1, [%c4]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_10 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "device not supported in the compiled configuration" | |
} | |
// -----// IR Dump After Inliner //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu], vm.toplevel} { | |
vm.module public @module { | |
vm.global.i32 private mutable @_device_query_0 : i32 | |
vm.global.ref private mutable @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private mutable @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c13 = vm.const.i32 13 | |
%c28 = vm.const.i32 28 | |
%zero = vm.const.i64.zero | |
%c3 = vm.const.i32 3 | |
%c17 = vm.const.i32 17 | |
%c150998019 = vm.const.i32 150998019 | |
%c50 = vm.const.i32 50 | |
%c3075 = vm.const.i32 3075 | |
%c16 = vm.const.i64 16 | |
%c4 = vm.const.i64 4 | |
%c2 = vm.const.i32 2 | |
%zero_0 = vm.const.i32.zero | |
%c16_1 = vm.const.i32 16 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_4 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_4, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_5 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_5, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_6 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_7 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref_6, %_utf8_tensor_3C6209B4FD120BDC_7, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%ref_8 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%ref_9 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_9, %_executable_layout_0, %zero_0, [(%zero_0, %ref, %zero, %c16), (%c1, %ref_6, %zero, %c16), (%c2, %ref_8, %zero, %c16)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
vm.call @hal.command_buffer.dispatch(%ref_9, %_executable_simple_mul_dispatch_0, %zero_0, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_9, %c28, %c13, %zero_0) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_9) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_10 = vm.call.variadic @hal.buffer_view.create(%ref_8, %c553648160, %c1, [%c4]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_10 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
vm.export @__init | |
vm.func private @__init() { | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%null_0 = vm.const.ref.zero : !vm.buffer | |
%c2 = vm.const.i32 2 | |
%c7 = vm.const.i32 7 | |
%zero = vm.const.i32.zero | |
%c1 = vm.const.i32 1 | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7), (%c1, %c7), (%c2, %c7)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%ref_1]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_2, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%ref_4 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3, %simple_mul_dispatch_0_embedded_elf_x86_64, %null_0, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_4 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__deinit | |
vm.func private @__deinit() { | |
vm.return | |
} | |
} | |
} | |
// -----// IR Dump After Canonicalizer //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu], vm.toplevel} { | |
vm.module public @module { | |
vm.global.i32 private mutable @_device_query_0 : i32 | |
vm.global.ref private mutable @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private mutable @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %offset : i32, %values : i32 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.bind_descriptor_set(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executable_layout>, %set : i32, %descriptor_set : !vm.ref<!hal.descriptor_set>, %dynamic_offsets : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroup_x : i32, %workgroup_y : i32, %workgroup_z : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.dispatch.indirect(%command_buffer : !vm.ref<!hal.command_buffer>, %executable : !vm.ref<!hal.executable>, %entry_point : i32, %workgroups_buffer : !vm.ref<!hal.buffer>, %workgroups_offset : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set.create(%device : !vm.ref<!hal.device>, %set_layout : !vm.ref<!hal.descriptor_set_layout>, %bindings : tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) -> !vm.ref<!hal.descriptor_set> attributes {sym_visibility = "private"} | |
vm.import @hal.descriptor_set_layout.create(%device : !vm.ref<!hal.device>, %usage_type : i32, %bindings : tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.allocator(%device : !vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.device.query.i32(%device : !vm.ref<!hal.device>, %category : !vm.buffer, %key : !vm.buffer) -> (i32, i32) attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable.create(%device : !vm.ref<!hal.device>, %executable_format : !vm.buffer, %executable_data : !vm.buffer, %constants : !vm.buffer, %executable_layouts : !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.executable_layout.create(%device : !vm.ref<!hal.device>, %push_constants : i32, %set_layouts : !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.create(%device : !vm.ref<!hal.device>, %initial_value : i64) -> !vm.ref<!hal.semaphore> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.semaphore.query(%semaphore : !vm.ref<!hal.semaphore>) -> (i32, i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.signal(%semaphore : !vm.ref<!hal.semaphore>, %new_value : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.fail(%semaphore : !vm.ref<!hal.semaphore>, %status : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.semaphore.await(%semaphore : !vm.ref<!hal.semaphore>, %min_value : i64) -> i32 attributes {sym_visibility = "private"} | |
vm.rodata private @_utf8_tensor_3C6209B4FD120BDC {alignment = 1 : i64} dense<[116, 101, 110, 115, 111, 114]> : vector<6xi8> | |
vm.func private @simple_mul(%arg0: !vm.ref<!hal.buffer_view>, %arg1: !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer_view> { | |
%c13 = vm.const.i32 13 | |
%c28 = vm.const.i32 28 | |
%zero = vm.const.i64.zero | |
%c3 = vm.const.i32 3 | |
%c17 = vm.const.i32 17 | |
%c150998019 = vm.const.i32 150998019 | |
%c50 = vm.const.i32 50 | |
%c3075 = vm.const.i32 3075 | |
%c16 = vm.const.i64 16 | |
%c4 = vm.const.i64 4 | |
%c2 = vm.const.i32 2 | |
%zero_0 = vm.const.i32.zero | |
%c16_1 = vm.const.i32 16 | |
%c1 = vm.const.i32 1 | |
%c553648160 = vm.const.i32 553648160 | |
%_device_query_0 = vm.global.load.i32 @_device_query_0 : i32 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_executable_simple_mul_dispatch_0 = vm.global.load.ref @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
%_utf8_tensor_3C6209B4FD120BDC = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg0, %_utf8_tensor_3C6209B4FD120BDC, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref = vm.call @hal.buffer_view.buffer(%arg0) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%ref_2 = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%ref_3 = vm.call @hal.device.allocator(%ref_2) {nosideeffects} : (!vm.ref<!hal.device>) -> !vm.ref<!hal.allocator> | |
%_utf8_tensor_3C6209B4FD120BDC_4 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref, %_utf8_tensor_3C6209B4FD120BDC_4, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%_utf8_tensor_3C6209B4FD120BDC_5 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call.variadic @hal.buffer_view.assert(%arg1, %_utf8_tensor_3C6209B4FD120BDC_5, %c553648160, %c1, [%c4]) : (!vm.ref<!hal.buffer_view>, !vm.buffer, i32, i32, i64 ...) | |
%ref_6 = vm.call @hal.buffer_view.buffer(%arg1) {nosideeffects} : (!vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> | |
%_utf8_tensor_3C6209B4FD120BDC_7 = vm.const.ref.rodata @_utf8_tensor_3C6209B4FD120BDC : !vm.buffer | |
vm.call @hal.buffer.assert(%ref_6, %_utf8_tensor_3C6209B4FD120BDC_7, %ref_3, %c16, %c16_1, %c3075) : (!vm.ref<!hal.buffer>, !vm.buffer, !vm.ref<!hal.allocator>, i64, i32, i32) -> () | |
%ref_8 = vm.call @hal.allocator.allocate(%ref_3, %c50, %c150998019, %c16) : (!vm.ref<!hal.allocator>, i32, i32, i64) -> !vm.ref<!hal.buffer> | |
%ref_9 = vm.call @hal.command_buffer.create(%ref_2, %c17, %c3) : (!vm.ref<!hal.device>, i32, i32) -> !vm.ref<!hal.command_buffer> | |
vm.call @hal.command_buffer.begin(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.cond_br %_device_query_0, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
vm.call.variadic @hal.command_buffer.push_descriptor_set(%ref_9, %_executable_layout_0, %zero_0, [(%zero_0, %ref, %zero, %c16), (%c1, %ref_6, %zero, %c16), (%c2, %ref_8, %zero, %c16)]) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable_layout>, i32, tuple<i32, !vm.ref<!hal.buffer>, i64, i64> ...) | |
vm.call @hal.command_buffer.dispatch(%ref_9, %_executable_simple_mul_dispatch_0, %zero_0, %c1, %c1, %c1) : (!vm.ref<!hal.command_buffer>, !vm.ref<!hal.executable>, i32, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.execution_barrier(%ref_9, %c28, %c13, %zero_0) : (!vm.ref<!hal.command_buffer>, i32, i32, i32) -> () | |
vm.call @hal.command_buffer.end(%ref_9) : (!vm.ref<!hal.command_buffer>) -> () | |
vm.call @hal.ex.submit_and_wait(%ref_2, %ref_9) : (!vm.ref<!hal.device>, !vm.ref<!hal.command_buffer>) -> () | |
%ref_10 = vm.call.variadic @hal.buffer_view.create(%ref_8, %c553648160, %c1, [%c4]) {nosideeffects} : (!vm.ref<!hal.buffer>, i32, i32, i64 ...) -> !vm.ref<!hal.buffer_view> | |
vm.return %ref_10 : !vm.ref<!hal.buffer_view> | |
^bb2: // pred: ^bb0 | |
vm.fail %c2, "device not supported in the compiled configuration" | |
} | |
vm.export @simple_mul | |
vm.export @__init | |
vm.func private @__init() { | |
%null = vm.const.ref.zero : !vm.ref<!hal.executable> | |
%null_0 = vm.const.ref.zero : !vm.buffer | |
%c2 = vm.const.i32 2 | |
%c7 = vm.const.i32 7 | |
%zero = vm.const.i32.zero | |
%c1 = vm.const.i32 1 | |
%ref = vm.call @hal.ex.shared_device() {nosideeffects} : () -> !vm.ref<!hal.device> | |
%_utf8_hal_executable_format_EAB228F999C2D3A1 = vm.const.ref.rodata @_utf8_hal_executable_format_EAB228F999C2D3A1 : !vm.buffer | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%0:2 = vm.call @hal.device.query.i32(%ref, %_utf8_hal_executable_format_EAB228F999C2D3A1, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer) -> (i32, i32) | |
%1 = vm.and.i32 %0#1, %c1 : i32 | |
%2 = vm.select.i32 %0#0, %1, %zero : i32 | |
%ref_1 = vm.call.variadic @hal.descriptor_set_layout.create(%ref, %c1, [(%zero, %c7), (%c1, %c7), (%c2, %c7)]) {nosideeffects} : (!vm.ref<!hal.device>, i32, tuple<i32, i32> ...) -> !vm.ref<!hal.descriptor_set_layout> | |
%ref_2 = vm.call.variadic @hal.executable_layout.create(%ref, %zero, [%ref_1]) {nosideeffects} : (!vm.ref<!hal.device>, i32, !vm.ref<!hal.descriptor_set_layout> ...) -> !vm.ref<!hal.executable_layout> | |
vm.global.store.i32 %2, @_device_query_0 : i32 | |
vm.global.store.ref %ref_2, @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.cond_br %2, ^bb1, ^bb2 | |
^bb1: // pred: ^bb0 | |
%_executable_layout_0 = vm.global.load.ref @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
%_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3 = vm.const.ref.rodata @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 : !vm.buffer | |
%simple_mul_dispatch_0_embedded_elf_x86_64 = vm.const.ref.rodata @simple_mul_dispatch_0_embedded_elf_x86_64 : !vm.buffer | |
%ref_4 = vm.call.variadic @hal.executable.create(%ref, %_utf8_embedded_elf_x86_64_9FD8733DA4A6F228_3, %simple_mul_dispatch_0_embedded_elf_x86_64, %null_0, [%_executable_layout_0]) {nosideeffects} : (!vm.ref<!hal.device>, !vm.buffer, !vm.buffer, !vm.buffer, !vm.ref<!hal.executable_layout> ...) -> !vm.ref<!hal.executable> | |
vm.br ^bb3(%ref_4 : !vm.ref<!hal.executable>) | |
^bb2: // pred: ^bb0 | |
vm.br ^bb3(%null : !vm.ref<!hal.executable>) | |
^bb3(%3: !vm.ref<!hal.executable>): // 2 preds: ^bb1, ^bb2 | |
vm.global.store.ref %3, @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.return | |
} | |
vm.export @__deinit | |
vm.func private @__deinit() { | |
vm.return | |
} | |
} | |
} | |
// -----// IR Dump After CSE //----- // | |
#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}> | |
module attributes {hal.device.targets = [#device_target_cpu], vm.toplevel} { | |
vm.module public @module { | |
vm.global.i32 private mutable @_device_query_0 : i32 | |
vm.global.ref private mutable @_executable_layout_0 : !vm.ref<!hal.executable_layout> | |
vm.global.ref private mutable @_executable_simple_mul_dispatch_0 : !vm.ref<!hal.executable> | |
vm.rodata private @simple_mul_dispatch_0_embedded_elf_x86_64 {alignment = 16 : i64, mime_type = "application/x-elf"} dense<"0x7F454C4602010100000000000000000003003E000100000030130000000000004000000000000000E00900000000000000000000400038000700400014001200060000000400000040000000000000004000000000000000400000000000000088010000000000008801000000000000080000000000000001000000040000000000000000000000000000000000000000000000000000002D030000000000002D030000000000000010000000000000010000000500000030030000000000003013000000000000301300000000000031000000000000003100000000000000001000000000000001000000060000007003000000000000702300000000000070230000000000003801000000000000380100000000000000100000000000000200000006000000E803000000000000E823000000000000E823000000000000C000000000000000C000000000000000080000000000000052E57464040000007003000000000000702300000000000070230000000000003801000000000000900C000000000000010000000000000051E574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000120006005013000000000000110000000000000002000000020000000000000001000000000000000000000000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279000000000000782300000000000008000000000000001003000000000000882300000000000008000000000000003013000000000000902300000000000008000000000000001003000000000000982300000000000008000000000000002C03000000000000A02300000000000008000000000000007023000000000000C02300000000000008000000000000008823000000000000C82300000000000008000000000000002803000000000000D02300000000000008000000000000009023000000000000D8230000000000000800000000000000982300000000000073696D706C655F6D756C5F64697370617463685F300000000000000000000000554889E5488B4620488B08488B5008488B40100F28010F59020F290031C05DC331C083FF02488D0D44100000480F44C1C30000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001E000000000000000800000000000000FBFFFF6F000000000100000000000000070000000000000038020000000000000800000000000000D80000000000000009000000000000001800000000000000F9FFFF6F0000000009000000000000000600000000000000C8010000000000000B000000000000001800000000000000050000000000000010020000000000000A0000000000000023000000000000000400000000000000F80100000000000000000000000000000000000000000000011101250E1305030E10171B0EB44219110112060000022E006E0E030E3F19200B0000032E0111011206401831130000041D00311311011206580B590B570B000000590000000400000000000801000000000200070000000000000005000000301300000000000020000000020700000007000000010330130000000000002000000001562A000000042A0000003F130000000000001100000002010100006D6C6972002F0073696D706C655F6D756C5F64697370617463685F3000280000000200000000005D0000002A00000073696D706C655F6D756C5F64697370617463685F3000000000000E0000000200000000005D0000000000000000000000000014000000FFFFFFFF040008000178100C0708900100000000240000000000000030130000000000002000000000000000410E108602430D065B0C070800000000140000000000000050130000000000001100000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000D300000000000000140000000000000000000000000000000900000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000001E00000000000000140000000000000000000000000000002800000000000000140000000000000000000000000000006F00000000000000140000000000000000000000000000000301000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000000300000000000000140000000000000000000000000000006F0000000000000014000000000000000000000000000000B8000000000000001400000000000000000000000000000052010000000000001400000000000000000000000000000042030000000000009E00000004007F000000010101FB0E0D0001010101000000010000012F7573722F6C6F63616C2F676F6F676C652F686F6D652F62656E76616E696B2F7372632F6972656500003C756E6B6E6F776E3E0001000072756E74696D652F7372632F697265652F72756E74696D652F74657374646174612F73696D706C655F6D756C2E6D6C69720001000000000902301300000000000011040205010A4B0508AD080001014952454500000000000000000000000000000000000000000000000000002300000000020800E8230000000000000000000000000000010000001200060050130000000000001100000000000000002E64796E73796D002E68617368002E64796E737472002E72656C612E64796E002E726F64617461002E74657874002E646174612E72656C2E726F002E64796E616D6963002E64656275675F616262726576002E64656275675F696E666F002E64656275675F737472002E64656275675F7075626E616D6573002E64656275675F7075627479706573002E64656275675F6672616D65002E64656275675F6C696E65002E636F6D6D656E74002E73796D746162002E7368737472746162002E7374727461620000697265655F68616C5F65786563757461626C655F6C6962726172795F7175657279005F44594E414D49430000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000B0000000200000000000000C801000000000000C801000000000000300000000000000003000000010000000800000000000000180000000000000009000000050000000200000000000000F801000000000000F80100000000000018000000000000000100000000000000040000000000000004000000000000000F0000000300000002000000000000001002000000000000100200000000000023000000000000000000000000000000010000000000000000000000000000001700000004000000020000000000000038020000000000003802000000000000D80000000000000001000000000000000800000000000000180000000000000021000000010000000200000000000000100300000000000010030000000000001D00000000000000000000000000000008000000000000000000000000000000290000000100000006000000000000003013000000000000300300000000000031000000000000000000000000000000100000000000000000000000000000002F0000000100000003000000000000007023000000000000700300000000000078000000000000000000000000000000100000000000000000000000000000003C000000060000000300000000000000E823000000000000E803000000000000C000000000000000030000000000000008000000000000001000000000000000450000000100000000000000000000000000000000000000A8040000000000004200000000000000000000000000000001000000000000000000000000000000530000000100000000000000000000000000000000000000EA040000000000005D000000000000000000000000000000010000000000000000000000000000005F000000010000003000000000000000000000000000000047050000000000001D000000000000000000000000000000010000000000000001000000000000006A000000010000000000000000000000000000000000000064050000000000002C000000000000000000000000000000010000000000000000000000000000007A0000000100000000000000000000000000000000000000900500000000000012000000000000000000000000000000010000000000000000000000000000008A0000000100000000000000000000000000000000000000A8050000000000005002000000000000000000000000000008000000000000000000000000000000970000000100000000000000000000000000000000000000F807000000000000A200000000000000000000000000000001000000000000000000000000000000A300000001000000300000000000000000000000000000009A080000000000000500000000000000000000000000000001000000000000000100000000000000AC0000000200000000000000000000000000000000000000A0080000000000004800000000000000130000000200000008000000000000001800000000000000B40000000300000000000000000000000000000000000000E808000000000000C600000000000000000000000000000001000000000000000000000000000000BE0000000300000000000000000000000000000000000000AE090000000000002C00000000000000000000000000000001000000000000000000000000000000"> : vector<3808xi8> | |
vm.rodata private @_utf8_hal_executable_format_EAB228F999C2D3A1 {alignment = 1 : i64} dense<[104, 97, 108, 46, 101, 120, 101, 99, 117, 116, 97, 98, 108, 101, 46, 102, 111, 114, 109, 97, 116]> : vector<21xi8> | |
vm.rodata private @_utf8_embedded_elf_x86_64_9FD8733DA4A6F228 {alignment = 1 : i64} dense<[101, 109, 98, 101, 100, 100, 101, 100, 45, 101, 108, 102, 45, 120, 56, 54, 95, 54, 52]> : vector<19xi8> | |
vm.import @hal.ex.shared_device() -> !vm.ref<!hal.device> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.ex.submit_and_wait(%device : !vm.ref<!hal.device>, %command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.allocate(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %allocation_size : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.map.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %try : i32, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.allocator.wrap.byte_buffer(%allocator : !vm.ref<!hal.allocator>, %memory_types : i32, %buffer_usage : i32, %source : !vm.buffer, %offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.assert(%buffer : !vm.ref<!hal.buffer>, %message : !vm.buffer, %allocator : !vm.ref<!hal.allocator>, %minimum_length : i64, %memory_types : i32, %buffer_usage : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.subspan(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i64) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.length(%buffer : !vm.ref<!hal.buffer>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer.load(%source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %length : i32) -> i32 attributes {sym_visibility = "private"} | |
vm.import @hal.buffer.store(%value : i32, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.create(%buffer : !vm.ref<!hal.buffer>, %element_type : i32, %encoding_type : i32, %shape : i64 ...) -> !vm.ref<!hal.buffer_view> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.assert(%buffer_view : !vm.ref<!hal.buffer_view>, %message : !vm.buffer, %element_type : i32, %encoding_type : i32, %shape : i64 ...) attributes {sym_visibility = "private"} | |
vm.import @hal.buffer_view.buffer(%buffer_view : !vm.ref<!hal.buffer_view>) -> !vm.ref<!hal.buffer> attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.byte_length(%buffer_view : !vm.ref<!hal.buffer_view>) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.element_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.encoding_type(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.rank(%buffer_view : !vm.ref<!hal.buffer_view>) -> i32 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.dim(%buffer_view : !vm.ref<!hal.buffer_view>, %index : i32) -> i64 attributes {nosideeffects, sym_visibility = "private"} | |
vm.import @hal.buffer_view.trace(%key : !vm.buffer, %operands : !vm.ref<!hal.buffer_view> ...) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.create(%device : !vm.ref<!hal.device>, %modes : i32, %command_categories : i32) -> !vm.ref<!hal.command_buffer> attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.begin_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>, %label : !vm.buffer) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.end_debug_group(%command_buffer : !vm.ref<!hal.command_buffer>) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.execution_barrier(%command_buffer : !vm.ref<!hal.command_buffer>, %source_stage_mask : i32, %target_stage_mask : i32, %flags : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.fill_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64, %pattern : i32, %pattern_length : i32) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.copy_buffer(%command_buffer : !vm.ref<!hal.command_buffer>, %source_buffer : !vm.ref<!hal.buffer>, %source_offset : i64, %target_buffer : !vm.ref<!hal.buffer>, %target_offset : i64, %length : i64) attributes {sym_visibility = "private"} | |
vm.import @hal.command_buffer.push_constants(%command_buffer : !vm.ref<!hal.command_buffer>, %executable_layout : !vm.ref<!hal.executa |
View raw
(Sorry about that, but we can’t show files that are this big right now.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment