This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
➜ pingpong python generate_8192_inputs.py | |
Traceback (most recent call last): | |
File "/usr/lib/python3/dist-packages/numpy/core/__init__.py", line 22, in <module> | |
from . import multiarray | |
File "/usr/lib/python3/dist-packages/numpy/core/multiarray.py", line 12, in <module> | |
from . import overrides | |
File "/usr/lib/python3/dist-packages/numpy/core/overrides.py", line 7, in <module> | |
from numpy.core._multiarray_umath import ( | |
ModuleNotFoundError: No module named 'numpy.core._multiarray_umath' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
➜ pingpong export ROCPROF_ATT_LIBRARY_PATH=$PWD/att-decoder-v3-3.0.0-Linux/opt/rocm/lib | |
➜ pingpong ~/therock/bin/rocprofv3 -i ~/pingpong/att.json -d traces -- ~/iree-build/tools/testing/e2e/iree-e2e-matmul-test --device=hip \ | |
--module=tmp/dispatch.vmfb \ | |
--module=tmp/calls.vmfb \ | |
--acceptable_fp_delta=1e-02 | |
*** Aborted at 1746043263 (unix time) try "date -d @1746043263" if you are using GNU date *** | |
PC: @ 0x7d411c73bf8a std::ostream::sentry::sentry(std::ostream&) | |
*** SIGSEGV (@0xffffffffffffffe8) received by PID 3817537 (TID 0x7d411c8e1000) from PID 18446744073709551592; stack trace: *** | |
@ 0x7d411c299ee8 (unknown) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
builtin.module @calls attributes { | |
} { | |
func.func private @matmul_test.generate_random_matrix(%device: !hal.device, %dim0: i64, %dim1: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view | |
func.func private @matmul_test.check_matmul_results(%device: !hal.device, %m: i64, %k: i64, %n: i64, %transpose_rhs: i32, %lhs: !hal.buffer_view, %rhs: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view) | |
func.func private @module.matmul(%lhs: !hal.buffer_view, %rhs: !hal.buffer_view) -> !hal.buffer_view | |
func.func @matmul() attributes { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After LoweringConfigInterpreterPass (iree-codegen-lowering-config-interpreter) //----- // | |
func.func @matmul_dispatch_0_matmul_like_Dx256x4096x4096_f8E4M3FNUZxf8E4M3FNUZxf32() attributes {translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [512, 1, 1] subgroup_size = 64, {gpu_pipeline_options = #iree_gpu.pipeline_options<prefetch_shared_memory = false, no_reduce_shared_memory_bank_conflicts = true>, llvm_func_attrs = {"amdgpu-waves-per-eu" = "2"}}>} { | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%cst = arith.constant 0.000000e+00 : f32 | |
%0 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32 | |
%1 = hal.interface.constant.load layout(<constants = 2, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
+ __global__ __launch_bounds__(256) static void run(const void *A_data, | |
+ const void *B_data, | |
+ void *C_data, int M_outer, int N_outer, | |
+ int K_outer) { | |
+ int total_tiles = M_outer * N_outer; | |
+ int cu = blockIdx.x; | |
+ int tile_start = total_tiles * cu / CUs; | |
+ int tile_end = total_tiles * (cu + 1) / CUs; | |
+ int m_outer = tile_start / N_outer; | |
+ int n_outer = tile_start - m_outer * N_outer; |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After CheckVHLOStableHloMixUsage (iree-check-vhlostablehlo-mix-usage) //----- // | |
module { | |
func.func @sort3D() { | |
%0 = util.unfoldable_constant dense<[[[1, 2, 3, 4], [4, 3, 2, 1]]]> : tensor<1x2x4xi32> | |
%1 = "stablehlo.sort"(%0) <{dimension = 2 : i64, is_stable = false}> ({ | |
^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>): | |
%2 = stablehlo.compare LT, %arg0, %arg1 : (tensor<i32>, tensor<i32>) -> tensor<i1> | |
stablehlo.return %2 : tensor<i1> | |
}) : (tensor<1x2x4xi32>) -> tensor<1x2x4xi32> | |
check.expect_eq_const(%1, dense<[[[1, 2, 3, 4], [1, 2, 3, 4]]]> : tensor<1x2x4xi32>) : tensor<1x2x4xi32> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.amdgcn_target "amdgcn-amd-amdhsa--gfx942" | |
.amdhsa_code_object_version 5 | |
.text | |
.globl _sort3D_dispatch_0_sort_1x2x4xi32_dispatch_tensor_store | |
.p2align 8 | |
.type _sort3D_dispatch_0_sort_1x2x4xi32_dispatch_tensor_store,@function | |
_sort3D_dispatch_0_sort_1x2x4xi32_dispatch_tensor_store: | |
s_load_dwordx2 s[2:3], s[0:1], 0x0 | |
s_waitcnt lgkmcnt(0) | |
s_branch .LBB0_0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/tests/e2e/math/math_ops_llvm-cpu.json b/tests/e2e/math/math_ops_rocm.json | |
index 57e8c422a7..fc4a6c964a 100644 | |
--- a/tests/e2e/math/math_ops_llvm-cpu.json | |
+++ b/tests/e2e/math/math_ops_rocm.json | |
@@ -8,8 +8,8 @@ | |
{ | |
"op": "acos", | |
"type": "f16", | |
- "atol": 1.0e-02, | |
- "rtol": 1.0e-02 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
::mlir::ParseResult ExpectAlmostEqConstOp::parse(::mlir::OpAsmParser &parser, ::mlir::OperationState &result) { | |
::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> deviceOperands; | |
::llvm::SMLoc deviceOperandsLoc; | |
(void)deviceOperandsLoc; | |
::mlir::OpAsmParser::UnresolvedOperand lhsRawOperand{}; | |
::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> lhsOperands(&lhsRawOperand, 1); ::llvm::SMLoc lhsOperandsLoc; | |
(void)lhsOperandsLoc; | |
::mlir::ElementsAttr valueAttr; | |
::mlir::FloatAttr toleranceAttr; | |
::mlir::Type lhsRawType{}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -----// IR Dump After CSE (cse) //----- // | |
func.func @__builtin_fill_i64(%arg0: !util.buffer, %arg1: !util.buffer, %arg2: !util.list<!util.buffer>, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32, %arg8: i32, %arg9: i32, %arg10: i32, %arg11: i32) attributes {translation_info = #iree_codegen.translation_info<pipeline = VMVXDefault>} { | |
%c12 = arith.constant 12 : index | |
%c8 = arith.constant 8 : index | |
%c4 = arith.constant 4 : index | |
%c0 = arith.constant 0 : index | |
%c32_i64 = arith.constant 32 : i64 | |
%c1 = arith.constant 1 : index | |
%buffer_size = util.buffer.size %arg1 : !util.buffer | |
%0 = util.buffer.load %arg1[%c0 for %c4] : !util.buffer{%buffer_size} -> i32 |
NewerOlder