Created
July 21, 2025 19:49
-
-
Save Artem-B/0fd96044926baa90ea83180f9a00c1b4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'LLVMDialectModule' | |
source_filename = "LLVMDialectModule" | |
target datalayout = "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" | |
@global_smem = external addrspace(3) global [0 x i8], align 16 | |
define ptx_kernel void @kernel(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) readnone captures(none) %2) local_unnamed_addr #0 !dbg !6 { | |
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !9 | |
%5 = and i32 %4, 31, !dbg !9 | |
%6 = lshr i32 %4, 5, !dbg !9 | |
%7 = shl nuw nsw i32 %4, 2, !dbg !9 | |
%8 = and i32 %7, 508, !dbg !9 | |
%9 = zext nneg i32 %8 to i64, !dbg !10 | |
%10 = getelementptr i8, ptr addrspace(1) %0, i64 %9, !dbg !10 | |
%11 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l"(ptr addrspace(1) %10) #5, !dbg !11 | |
%12 = bitcast i32 %11 to <4 x i8>, !dbg !11 | |
%13 = tail call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %12), !dbg !12 | |
%14 = sext i8 %13 to i32, !dbg !12 | |
%15 = tail call i32 @llvm.nvvm.redux.sync.min(i32 %14, i32 -1), !dbg !16 | |
%16 = and i32 %6, 3, !dbg !16 | |
%17 = icmp eq i32 %5, 0, !dbg !16 | |
%18 = zext nneg i32 %16 to i64, !dbg !16 | |
%19 = getelementptr i32, ptr addrspace(3) @global_smem, i64 %18, !dbg !16 | |
%20 = insertelement <1 x i32> poison, i32 %15, i64 0, !dbg !16 | |
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %19, <1 x i32> %20, i1 %17) #5, !dbg !16 | |
tail call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0), !dbg !16 | |
%21 = icmp samesign ult i32 %4, 4, !dbg !16 | |
%22 = zext nneg i32 %4 to i64, !dbg !16 | |
%23 = getelementptr i32, ptr addrspace(3) @global_smem, i64 %22, !dbg !16 | |
%24 = tail call i32 asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %23, i1 %21) #5, !dbg !16 | |
%25 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %24, i32 2, i32 31), !dbg !16 | |
%26 = tail call i32 @llvm.smin.i32(i32 %24, i32 %25), !dbg !12 | |
%27 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %26, i32 1, i32 31), !dbg !16 | |
%28 = tail call i32 @llvm.smin.i32(i32 %26, i32 %27), !dbg !12 | |
%29 = icmp eq i32 %4, 0, !dbg !16 | |
%30 = insertelement <1 x i32> poison, i32 %28, i64 0, !dbg !16 | |
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %23, <1 x i32> %30, i1 %29) #5, !dbg !16 | |
tail call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0), !dbg !16 | |
%31 = load i32, ptr addrspace(3) @global_smem, align 16, !dbg !16 | |
%32 = trunc i32 %31 to i8, !dbg !17 | |
tail call void asm sideeffect "@$2 st.global.b8 [ $1 + 0 ], { $0 };", "c,l,b"(i8 %32, ptr addrspace(1) %1, i1 %29) #5, !dbg !17 | |
ret void, !dbg !18 | |
} | |
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) | |
declare noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1 | |
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) | |
declare i32 @llvm.smin.i32(i32, i32) #1 | |
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite) | |
declare i32 @llvm.nvvm.redux.sync.min(i32, i32) #2 | |
; Function Attrs: convergent nocallback nounwind | |
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) #3 | |
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite) | |
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #2 | |
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | |
declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) #4 | |
attributes #0 = { "nvvm.reqntid"="128" } | |
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } | |
attributes #2 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) } | |
attributes #3 = { convergent nocallback nounwind } | |
attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | |
attributes #5 = { nounwind } | |
!llvm.dbg.cu = !{!0} | |
!llvm.module.flags = !{!2, !3} | |
!llvm.ident = !{!4} | |
!nvvmir.version = !{!5} | |
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly) | |
!1 = !DIFile(filename: "test_core.py", directory: "/usr/local/google/_blaze_tra/055757613a7bee1fd9750a4a19b01463/execroot/google3/blaze-out/k8-fastbuild-cuda/bin/third_party/triton/python/test/unit/language/test_core_h100.runfiles/google3/third_party/triton/python/test/unit/language") | |
!2 = !{i32 2, !"Debug Info Version", i32 3} | |
!3 = !{i32 4, !"nvvm-reflect-ftz", i32 1} | |
!4 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} | |
!5 = !{i32 2, i32 0} | |
!6 = distinct !DISubprogram(name: "kernel", linkageName: "kernel", scope: !1, file: !1, line: 2421, type: !7, scopeLine: 2421, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) | |
!7 = !DISubroutineType(cc: DW_CC_normal, types: !8) | |
!8 = !{} | |
!9 = !DILocation(line: 2422, column: 33, scope: !6) | |
!10 = !DILocation(line: 2422, column: 20, scope: !6) | |
!11 = !DILocation(line: 2422, column: 16, scope: !6) | |
!12 = !DILocation(line: 226, column: 27, scope: !13, inlinedAt: !15) | |
!13 = distinct !DILexicalBlockFile(scope: !6, file: !14, discriminator: 0) | |
!14 = !DIFile(filename: "standard.py", directory: "/usr/local/google/_blaze_tra/055757613a7bee1fd9750a4a19b01463/execroot/google3/blaze-out/k8-fastbuild-cuda/bin/third_party/triton/python/test/unit/language/test_core_h100.runfiles/google3/third_party/py/triton/language") | |
!15 = !DILocation(line: 2423, column: 15, scope: !6) | |
!16 = !DILocation(line: 247, column: 40, scope: !13, inlinedAt: !15) | |
!17 = !DILocation(line: 2424, column: 16, scope: !6) | |
!18 = !DILocation(line: 2424, column: 4, scope: !6) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment