Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Last active June 9, 2023 16:41
Show Gist options
  • Save raphlinus/a8e0a3a3683127149b746eb37822bdc8 to your computer and use it in GitHub Desktop.
Save raphlinus/a8e0a3a3683127149b746eb37822bdc8 to your computer and use it in GitHub Desktop.
MetalLibraryExplorer dump of metal-shaderconverter output
ByteAddressBuffer input;
RWByteAddressBuffer output;
groupshared uint max_value;
[numthreads(256, 1, 1)]
void main(uint index: SV_GroupIndex) {
if (index == 0) {
max_value = 0;
}
GroupMemoryBarrierWithGroupSync();
InterlockedMax(max_value, input.Load(index * 4));
GroupMemoryBarrierWithGroupSync();
if (index == 0) {
output.Store((index / 256) * 4, max_value);
}
}
; ModuleID = 'shader.air'
source_filename = "main"
target datalayout = "e-p:64:64:64-f32:32:32-i64:64:64-i32:32:32-i16:16:16-v32:32:32-n32-S64"
target triple = "air64-apple-macosx14.0.0"
%struct.top_level_global_ab.1 = type { %struct__desc, %struct__desc }
%struct__desc = type { i8 addrspace(1)*, %struct._texture_buffer_1d_t addrspace(1)*, i64 }
%struct._texture_buffer_1d_t = type opaque
%struct.res_desc_heap_ab.2 = type opaque
%struct.smp_desc_heap_ab.3 = type opaque
@"\01?max_value@@3IA" = internal addrspace(3) global i32 undef, align 4
define void @main(i32 %0, %struct.top_level_global_ab.1 addrspace(2)* %1, %struct.res_desc_heap_ab.2 addrspace(2)* %2, %struct.smp_desc_heap_ab.3 addrspace(2)* %3) local_unnamed_addr {
%5 = bitcast %struct.top_level_global_ab.1 addrspace(2)* %1 to i32 addrspace(1)* addrspace(2)*
%.1.unpack6 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %5, align 8
%6 = getelementptr inbounds %struct.top_level_global_ab.1, %struct.top_level_global_ab.1 addrspace(2)* %1, i64 0, i32 1
%7 = bitcast %struct__desc addrspace(2)* %6 to i32 addrspace(1)* addrspace(2)*
%..unpack12 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %7, align 8
%8 = icmp eq i32 %0, 0
br i1 %8, label %9, label %10
9: ; preds = %4
store i32 0, i32 addrspace(3)* @"\01?max_value@@3IA", align 4, !tbaa !22
br label %10
10: ; preds = %9, %4
%11 = phi i1 [ %8, %9 ], [ false, %4 ]
call void @air.wg.barrier(i32 2, i32 1)
%12 = and i32 %0, 1073741823
%13 = zext i32 %12 to i64
%14 = getelementptr i32, i32 addrspace(1)* %.1.unpack6, i64 %13
%.unpack = load i32, i32 addrspace(1)* %14, align 4
%15 = atomicrmw umax i32 addrspace(3)* @"\01?max_value@@3IA", i32 %.unpack seq_cst, align 4
call void @air.wg.barrier(i32 2, i32 1)
br i1 %11, label %16, label %18
16: ; preds = %10
%17 = load i32, i32 addrspace(3)* @"\01?max_value@@3IA", align 4, !tbaa !22
store i32 %17, i32 addrspace(1)* %..unpack12, align 4
br label %18
18: ; preds = %16, %10
ret void
}
; Function Attrs: convergent nounwind
declare void @air.wg.barrier(i32, i32) local_unnamed_addr #0
attributes #0 = { convergent nounwind }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
!llvm.ident = !{!11}
!air.version = !{!12}
!air.language_version = !{!13}
!air.kernel = !{!14}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"air.max_device_buffers", i32 31}
!2 = !{i32 7, !"air.max_constant_buffers", i32 31}
!3 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!4 = !{i32 7, !"air.max_textures", i32 128}
!5 = !{i32 7, !"air.max_read_write_textures", i32 8}
!6 = !{i32 7, !"air.max_samplers", i32 16}
!7 = !{i32 7, !"agx.allow_mismatched_component_counts", i32 1}
!8 = !{i32 1, !"irc.version_major", i32 0}
!9 = !{i32 1, !"irc.version_minor", i32 1}
!10 = !{i32 1, !"irc.version_patch", i32 0}
!11 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
!12 = !{i32 2, i32 6, i32 0}
!13 = !{!"Metal", i32 3, i32 1, i32 0}
!14 = !{void (i32, %struct.top_level_global_ab.1 addrspace(2)*, %struct.res_desc_heap_ab.2 addrspace(2)*, %struct.smp_desc_heap_ab.3 addrspace(2)*)* @main, !15, !16, !21}
!15 = !{}
!16 = !{!17, !18, !19, !20}
!17 = !{i32 0, !"air.thread_index_in_threadgroup", !"air.arg_type_name", !"uint", !"air.arg_name", !"tindex"}
!18 = !{i32 1, !"air.indirect_buffer", !"air.location_index", i32 16, i32 1, !"air.read", !"air.address_space", i32 2, !"air.struct_type_info", !15, !"air.arg_type_name", !"struct.top_level_global_ab.1", !"air.arg_name", !"top_level_global_ab"}
!19 = !{i32 2, !"air.indirect_buffer", !"air.location_index", i32 18, i32 1, !"air.read", !"air.address_space", i32 2, !"air.struct_type_info", !15, !"air.arg_type_name", !"struct.res_desc_heap_ab.2", !"air.arg_name", !"res_desc_heap_ab", !"air.arg_unused"}
!20 = !{i32 3, !"air.indirect_buffer", !"air.location_index", i32 19, i32 1, !"air.read", !"air.address_space", i32 2, !"air.struct_type_info", !15, !"air.arg_type_name", !"struct.smp_desc_heap_ab.3", !"air.arg_name", !"smp_desc_heap_ab", !"air.arg_unused"}
!21 = !{!"air.max_work_group_size", i32 256}
!22 = !{!23, !23, i64 0}
!23 = !{!"int", !24, i64 0}
!24 = !{!"omnipotent char", !25, i64 0}
!25 = !{!"Simple C/C++ TBAA"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment