This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; ModuleID = '/home/mlevental/dev_projects/mlir-python-extras/examples/llvm.bc' | |
| source_filename = "LLVMDialectModule" | |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" | |
| target triple = "amdgcn-amd-amdhsa" | |
| @__dynamic_shmem__0 = internal unnamed_addr addrspace(3) global [0 x i8] undef | |
| ; Function Attrs: nofree norecurse nounwind | |
| define amdgpu_kernel void @kernel2_lds_shared0(ptr readonly captures(none) %0, ptr readonly captures(none) %1, ptr writeonly captures(none) %2) local_unnamed_addr #0 { | |
| %.global3 = addrspacecast ptr %1 to ptr addrspace(1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ; __CLANG_OFFLOAD_BUNDLE____START__ hip-amdgcn-amd-amdhsa--gfx1150 | |
| ; ModuleID = 'src/kernel2_lds.cpp' | |
| source_filename = "src/kernel2_lds.cpp" | |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" | |
| target triple = "amdgcn-amd-amdhsa" | |
| @_ZZ11kernel2_ldsPKfS0_PfiE2As = internal unnamed_addr addrspace(3) global [32 x [32 x float]] undef, align 16 | |
| @_ZZ11kernel2_ldsPKfS0_PfiE2Bs = internal unnamed_addr addrspace(3) global [32 x [32 x float]] undef, align 16 | |
| @__hip_cuid_db26c5b7fc0b9bd = addrspace(1) global i8 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ============================ ROCm System Management Interface ============================ | |
| ============================== Version of System Component =============================== | |
| Driver version: 6.10.5 | |
| ========================================================================================== | |
| =========================================== ID =========================================== | |
| GPU[0] : Device Name: Strix [Radeon 880M / 890M] | |
| GPU[0] : Device ID: 0x150e | |
| GPU[0] : Device Rev: 0xc1 | |
| GPU[0] : Subsystem ID: 0x1df3 | |
| GPU[0] : GUID: 39438 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import numpy as np | |
| build_times = open("log.bkup.txt").read() | |
| link_smt_times = open("log.link.smt.txt").read() | |
| link_no_smt_times = open("log.link.no.smt.txt").read() | |
| real_reg = re.compile(r"real\s+(.*?)s") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| %52 = arith.ceildivsi %arg5, %c64_i32_1 : i32 | |
| %53 = scf.forall (%arg12, %arg13) in (4, 1) shared_outs(%arg14 = %47) -> (tensor<64x64x!tt.ptr<f32>>) { | |
| %72 = affine.apply #map(%arg12) | |
| %73 = affine.apply #map1(%arg13) | |
| %extracted_slice = tensor.extract_slice %36[%72, %73] [16, 64] [1, 1] | |
| %extracted_slice_10 = tensor.extract_slice %arg14[%72, %73] [16, 64] [1, 1] | |
| %74 = linalg.generic { | |
| indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel"] | |
| } ins(%extracted_slice : tensor<16x64x!tt.ptr<f32>>) outs(%extracted_slice_10 : tensor<16x64x!tt.ptr<f32>>) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| BEFORE | |
| #map = affine_map<(d0, d1) -> (d0, d1)> | |
| module { | |
| module attributes {transform.target_tag = "payload"} { | |
| tt.func public @matmul_kernel_2(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg1: !tt.ptr<f32> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg2: !tt.ptr<f32> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: i32, %arg8: i32, %arg9: i32, %arg10: i32, %arg11: i32) attributes {noinline = false} { | |
| %c64_i32 = arith.constant 64 : i32 | |
| %c64_i32_0 = arith.constant 64 : i32 | |
| %c64_i32_1 = arith.constant 64 : i32 | |
| %c1_i32 = arith.constant 1 : i32 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from triton_mlir.extras.context import RAIIMLIRContextModule | |
| from triton_mlir.dialects import tt as ttpp, scf, llvm, _tt_ops_gen as tt | |
| from triton_mlir.ir import Attribute, ArrayAttr, TypeAttr, Type | |
| from triton_mlir.extras.dialects.ext import arith | |
| ctx = RAIIMLIRContextModule() | |
| @ttpp.jit(arg_attrs=ArrayAttr.parse('[{tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, {tt.divisibility = 16 : i32}, {tt.divisibility = 16 : i32}, {tt.divisibility = 16 : i32}, {tt.divisibility = 16 : i32}, {tt.divisibility = 16 : i32}, {tt.divisibility = 16 : i32}]'), function_type=TypeAttr.parse('(!tt.ptr<f16>, !tt.ptr<f16>, !tt.ptr<f16>, i32, i32, i32, i32, i32, i32) -> ()'), noinline=False, sym_name='matmul_kernel', sym_visibility='public') | |
| def matmul_kernel(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| MLIR_CAPI_EXPORTED MlirAttribute mlirCallSiteLocAttrGet(Location callee, Location caller, MlirContext mlirContext); | |
| MLIR_CAPI_EXPORTED Location mlirCallSiteLocGetcallee(MlirAttribute mlirCallSiteLoc); | |
| MLIR_CAPI_EXPORTED Location mlirCallSiteLocGetcaller(MlirAttribute mlirCallSiteLoc); | |
| MLIR_CAPI_EXPORTED MlirAttribute mlirFileLineColRangeAttrGet(StringAttr filename, unsigned start_line, unsigned start_column, unsigned end_line, unsigned end_column, MlirContext mlirContext); | |
| MLIR_CAPI_EXPORTED StringAttr mlirFileLineColRangeGetfilename(MlirAttribute mlirFileLineColRange); | |
| MLIR_CAPI_EXPORTED unsigned mlirFileLineColRangeGetstart_line(MlirAttribute mlirFileLineColRange); | |
| MLIR_CAPI_EXPORTED unsigned mlirFileLineColRangeGetstart_column(MlirAttribute mlirFileLineColRange); | |
| MLIR_CAPI_EXPORTED unsigned mlirFileLineColRangeGetend_line(MlirAttribute mlirFileLineColRange); | |
| MLIR_CAPI_EXPORTED unsigned mlirFileLineColRangeGetend_column(MlirAttribute mlirFileLineColRange); | |
| MLIR_CAPI_EXPORTED MlirAttribute mlirFusedLocAttrGe |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/CMakeLists.txt b/CMakeLists.txt | |
| index de6ed2393..db776c0fc 100644 | |
| --- a/CMakeLists.txt | |
| +++ b/CMakeLists.txt | |
| @@ -47,6 +47,8 @@ if (TRITON_PARALLEL_LINK_JOBS) | |
| set(CMAKE_JOB_POOL_LINK link_job_pool) | |
| endif() | |
| +string(REPLACE "-Wl,-z,defs", "" CMAKE_MODULE_LINKER_FLAGS ${CMAKE_MODULE_LINKER_FLAGS}) | |
| +string(REPLACE "-Wl,-z,defs", "" CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| source_filename = "LLVMDialectModule" | |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" | |
| target triple = "amdgcn-amd-amdhsa" | |
| @global_smem = external addrspace(3) global [0 x i8], align 16 | |
| ; Function Attrs: alwaysinline nofree norecurse nounwind | |
| define amdgpu_kernel void @matmul_kernel(ptr addrspace(1) inreg readonly captures(none) %0, ptr addrspace(1) inreg readonly captures(none) %1, ptr addrspace(1) inreg writeonly %2, i32 inreg %3, i32 inreg %4, i32 inreg %5, i32 inreg %6, i32 inreg %7, i32 inreg %8, ptr addrspace(1) inreg readnone captures(none) %9) local_unnamed_addr #0 !dbg !4 { | |
| %11 = tail call i32 @llvm.amdgcn.workgroup.id.x(), !dbg !7 | |
| %12 = add i32 %4, 255, !dbg !8 |