Created
October 18, 2021 21:54
-
-
Save Artem-B/628359518f4eebccecac281c746a8090 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Reproducer for a bad performance regression triggered by switch to the new PM. | |
; `barney` ended up with the local variables not being optimized away and that | |
; had rather dramatic effect on some GPU code. See | |
; https://bugs.llvm.org/show_bug.cgi?id=52037 for the gory details. | |
; | |
; NOTE that opt -O3 produces different IR. | |
; | |
; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes='default<O3>' -S %s -o - \ | |
; RUN: | llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - \ | |
; RUN: | FileCheck %s | |
; CHECK-LABEL: .visible .entry barney( | |
; CHECK-NOT: .local{{.*}}__local_depot | |
; CHECK: ret; | |
source_filename = "reduced.1.ll" | |
%char3 = type { i8, i8, i8 } | |
%float4 = type { float, float, float, float } | |
%float3 = type { float, float, float } | |
%int3 = type { i32, i32, i32 } | |
%struct.wwwww = type { i32 (...)**, [8 x i8], i32, [12 x i8] } | |
%struct.blam = type <{ i32*, i16*, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32, [12 x i8] }> | |
%struct.spam.2 = type { %struct.foo.3, i16*, float, float, i32, float } | |
%struct.foo.3 = type <{ %float4*, %float4*, %float4*, i32*, i32*, i32, i32, float }> | |
%struct.zot = type { %struct.bar, [8 x i8], %struct.foo, [12 x i8] } | |
%struct.bar = type { i32 (...)** } | |
%struct.foo = type <{ i16*, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }> | |
@global = external addrspace(4) externally_initialized global [27 x %char3], align 1 | |
@global.1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8 | |
; Function Attrs: argmemonly nofree nounwind willreturn | |
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 | |
declare %float4 @snork(float) | |
declare %float3 @bar.2(float, float) | |
declare %float3 @zot() | |
declare %int3 @hoge(i32, i32, i32) | |
define internal i1 @pluto(%struct.wwwww* %arg) { | |
bb: | |
%tmp = call i64 @foo() | |
%tmp1 = bitcast %struct.wwwww* %arg to i8** | |
%tmp2 = load i8*, i8** %tmp1, align 8, !tbaa !1 | |
%tmp3 = getelementptr i8, i8* %tmp2, i64 -24 | |
%tmp4 = bitcast i8* %tmp3 to i64* | |
%tmp5 = load i64, i64* %tmp4, align 8 | |
%tmp6 = bitcast %struct.wwwww* %arg to i8* | |
%tmp7 = getelementptr inbounds i8, i8* %tmp6, i64 %tmp5 | |
%tmp8 = bitcast i8* %tmp7 to %struct.blam* | |
%tmp9 = getelementptr inbounds %struct.blam, %struct.blam* %tmp8, i32 undef, i32 7 | |
%tmp10 = load i64, i64* %tmp9, align 16 | |
%tmp11 = add i64 %tmp10, %tmp | |
store i64 %tmp11, i64* %tmp9, align 16, !tbaa !4 | |
%tmp12 = bitcast %struct.wwwww* %arg to i8** | |
%tmp13 = load i8*, i8** %tmp12, align 8, !tbaa !1 | |
%tmp14 = getelementptr i8, i8* %tmp13, i64 -24 | |
%tmp15 = bitcast i8* %tmp14 to i64* | |
%tmp16 = load i64, i64* %tmp15, align 8 | |
%tmp17 = bitcast %struct.wwwww* %arg to i8* | |
%tmp18 = getelementptr inbounds i8, i8* %tmp17, i64 %tmp16 | |
%tmp19 = bitcast i8* %tmp18 to %struct.blam* | |
%tmp20 = getelementptr inbounds %struct.blam, %struct.blam* %tmp19, i32 undef, i32 1 | |
%tmp21 = load i16*, i16** %tmp20, align 8 | |
%tmp22 = bitcast %struct.wwwww* %arg to i8** | |
%tmp23 = load i8*, i8** %tmp22, align 8 | |
%tmp24 = getelementptr i8, i8* %tmp23, i64 -24 | |
%tmp25 = bitcast i8* %tmp24 to i64* | |
%tmp26 = load i64, i64* %tmp25, align 8 | |
%tmp27 = bitcast %struct.wwwww* %arg to i8* | |
%tmp28 = getelementptr inbounds i8, i8* %tmp27, i64 %tmp26 | |
%tmp29 = bitcast i8* %tmp28 to %struct.blam* | |
%tmp30 = getelementptr inbounds %struct.blam, %struct.blam* %tmp29, i32 undef, i32 7 | |
%tmp31 = load i64, i64* %tmp30, align 16 | |
%tmp32 = bitcast %struct.wwwww* %arg to i8** | |
%tmp33 = load i8*, i8** %tmp32, align 8 | |
%tmp34 = getelementptr i8, i8* %tmp33, i64 -24 | |
%tmp35 = bitcast i8* %tmp34 to i64* | |
%tmp36 = load i64, i64* %tmp35, align 8 | |
%tmp37 = bitcast %struct.wwwww* %arg to i8* | |
%tmp38 = getelementptr inbounds i8, i8* %tmp37, i64 %tmp36 | |
%tmp39 = bitcast i8* %tmp38 to %struct.blam* | |
%tmp40 = getelementptr inbounds %struct.blam, %struct.blam* %tmp39, i32 undef, i32 4 | |
%tmp41 = load i32, i32* %tmp40, align 4 | |
%tmp42 = zext i32 %tmp41 to i64 | |
%tmp43 = add i64 %tmp31, %tmp42 | |
%tmp44 = getelementptr inbounds i16, i16* %tmp21, i64 %tmp43 | |
%tmp45 = load i16, i16* %tmp44, align 2 | |
%tmp46 = zext i16 %tmp45 to i32 | |
%tmp47 = icmp eq i32 %tmp46, 65535 | |
br i1 %tmp47, label %bb67, label %bb49 | |
bb49: ; preds = %bb | |
%tmp50 = bitcast %struct.wwwww* %arg to i8** | |
%tmp51 = load i8*, i8** %tmp50, align 8 | |
%tmp52 = getelementptr i8, i8* %tmp51, i64 -24 | |
%tmp53 = bitcast i8* %tmp52 to i64* | |
%tmp54 = load i64, i64* %tmp53, align 8 | |
%tmp55 = bitcast %struct.wwwww* %arg to i8* | |
%tmp56 = getelementptr inbounds i8, i8* %tmp55, i64 %tmp54 | |
%tmp57 = bitcast i8* %tmp56 to %struct.blam* | |
%tmp58 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 2 | |
%tmp59 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 5 | |
%tmp60 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 0 | |
%tmp61 = load i32*, i32** %tmp60, align 16 | |
%tmp62 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 3 | |
%tmp63 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 9 | |
%tmp64 = getelementptr inbounds %struct.blam, %struct.blam* %tmp57, i32 undef, i32 8 | |
%tmp8.i = zext i16 %tmp45 to i32 | |
%tmp9.i = ashr i32 %tmp8.i, 11 | |
%tmp10.i = sub nsw i32 %tmp9.i, 1 | |
%tmp11.i = trunc i32 %tmp10.i to i8 | |
store i8 %tmp11.i, i8* %tmp63, align 1 | |
%tmp12.i = bitcast %float4* %tmp58 to %float3* | |
%tmp13.i = call %float3 @zot() #1 | |
%tmp15.i = extractvalue %float3 %tmp13.i, 0 | |
%tmp18.i = getelementptr inbounds %float4, %float4* %tmp58, i32 undef, i32 0 | |
%tmp19.i = load float, float* %tmp18.i, align 4 | |
%tmp22.i = fsub contract float %tmp19.i, %tmp15.i | |
%tmp23.i = getelementptr inbounds %float3, %float3* %tmp12.i, i32 undef, i32 2 | |
%tmp24.i = load float, float* %tmp23.i, align 4 | |
%tmp17.i = extractvalue %float3 %tmp13.i, 2 | |
%tmp27.i = fsub contract float %tmp24.i, %tmp17.i | |
%tmp28.i = call %float3 @bar.2(float %tmp22.i, float %tmp27.i) #1 | |
store %float3 %tmp28.i, %float3* %tmp59, align 4 | |
%tmp37.i = load i8, i8* %tmp63, align 1 | |
%tmp38.i = zext i8 %tmp37.i to i64 | |
%tmp39.i = getelementptr inbounds [27 x %char3], [27 x %char3]* addrspacecast ([27 x %char3] addrspace(4)* @global to [27 x %char3]*), i64 0, i64 %tmp38.i | |
%tmp40.i = getelementptr inbounds %int3, %int3* %tmp62, i32 undef, i32 0 | |
%tmp41.i = load i32, i32* %tmp40.i, align 4 | |
%tmp42.i = getelementptr inbounds %char3, %char3* %tmp39.i, i32 undef, i32 0 | |
%tmp43.i = load i8, i8* %tmp42.i, align 1 | |
%tmp44.i = sext i8 %tmp43.i to i32 | |
%tmp45.i = add nsw i32 %tmp41.i, %tmp44.i | |
%tmp48.i = getelementptr inbounds %int3, %int3* %tmp62, i32 undef, i32 1 | |
%tmp49.i = load i32, i32* %tmp48.i, align 4 | |
%tmp50.i = getelementptr inbounds %char3, %char3* %tmp39.i, i32 undef, i32 1 | |
%tmp51.i = load i8, i8* %tmp50.i, align 1 | |
%tmp52.i = sext i8 %tmp51.i to i32 | |
%tmp53.i = add nsw i32 %tmp49.i, %tmp52.i | |
%tmp54.i = getelementptr inbounds %int3, %int3* %tmp62, i32 undef, i32 2 | |
%tmp55.i = load i32, i32* %tmp54.i, align 4 | |
%tmp56.i = getelementptr inbounds %char3, %char3* %tmp39.i, i32 undef, i32 2 | |
%tmp57.i = load i8, i8* %tmp56.i, align 1 | |
%tmp58.i = sext i8 %tmp57.i to i32 | |
%tmp59.i = add nsw i32 %tmp55.i, %tmp58.i | |
%tmp60.i = call %int3 @hoge(i32 %tmp45.i, i32 %tmp53.i, i32 %tmp59.i) #1 | |
%tmp61.i = getelementptr inbounds i32, i32* %tmp61, i64 undef | |
%tmp62.i = load i32, i32* %tmp61.i, align 4 | |
store i32 %tmp62.i, i32* %tmp64, align 4 | |
br label %bb67 | |
bb67: ; preds = %bb49, %bb | |
%tmp68 = phi i1 [ true, %bb49 ], [ false, %bb ] | |
ret i1 %tmp68 | |
} | |
declare i64 @foo() | |
define void @barney(%struct.spam.2* %arg) { | |
bb: | |
call void asm sideeffect "// KEEP", ""() | |
%tmp = alloca %struct.zot, align 16 | |
%tmp1 = alloca i32, i32 undef, align 4 | |
%tmp2 = alloca %float4, i32 undef, align 16 | |
br label %bb3 | |
bb3: ; preds = %bb | |
%tmp4 = getelementptr inbounds %struct.spam.2, %struct.spam.2* %arg, i32 undef, i32 1 | |
%tmp5 = load i16*, i16** %tmp4, align 8 | |
%tmp6 = bitcast %struct.zot* %tmp to i8* | |
%tmp7 = getelementptr inbounds i8, i8* %tmp6, i64 16 | |
%tmp8 = bitcast i8* %tmp7 to %struct.blam* | |
%tmp9 = getelementptr inbounds %struct.blam, %struct.blam* %tmp8, i32 undef, i32 1 | |
store i16* %tmp5, i16** %tmp9, align 8 | |
%tmp10 = bitcast %struct.zot* %tmp to i32 (...)*** | |
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global.1, i32 0, inrange i32 0, i32 3) to i32 (...)**), i32 (...)*** %tmp10, align 8 | |
br label %bb11 | |
bb11: ; preds = %bb49, %bb3 | |
%tmp12 = bitcast %struct.zot* %tmp to %struct.wwwww* | |
%tmp13 = call i1 @pluto(%struct.wwwww* %tmp12) | |
br i1 %tmp13, label %bb15, label %bb14 | |
bb14: ; preds = %bb11 | |
ret void | |
bb15: ; preds = %bb11 | |
%tmp16 = bitcast %struct.zot* %tmp to i8** | |
%tmp17 = load i8*, i8** %tmp16, align 16 | |
%tmp18 = getelementptr i8, i8* %tmp17, i64 -24 | |
%tmp19 = bitcast i8* %tmp18 to i64* | |
%tmp20 = load i64, i64* %tmp19, align 8 | |
%tmp21 = bitcast %struct.zot* %tmp to i8* | |
%tmp22 = getelementptr inbounds i8, i8* %tmp21, i64 %tmp20 | |
%tmp23 = bitcast i8* %tmp22 to %struct.blam* | |
%tmp24 = getelementptr inbounds %struct.blam, %struct.blam* %tmp23, i32 undef, i32 11 | |
%tmp25 = load i32, i32* %tmp24, align 4 | |
store i32 %tmp25, i32* %tmp1, align 4 | |
%tmp26 = bitcast %struct.zot* %tmp to i8** | |
%tmp27 = load i8*, i8** %tmp26, align 16 | |
%tmp28 = getelementptr i8, i8* %tmp27, i64 -24 | |
%tmp29 = bitcast i8* %tmp28 to i64* | |
%tmp30 = load i64, i64* %tmp29, align 8 | |
%tmp31 = bitcast %struct.zot* %tmp to i8* | |
%tmp32 = getelementptr inbounds i8, i8* %tmp31, i64 %tmp30 | |
%tmp33 = bitcast i8* %tmp32 to %struct.blam* | |
%tmp34 = bitcast %struct.spam.2* %arg to %float4** | |
%tmp35 = load i32, i32* %tmp1, align 4 | |
%tmp36 = load %float4*, %float4** %tmp34, align 8 | |
%tmp37 = zext i32 %tmp35 to i64 | |
%tmp38 = getelementptr inbounds %float4, %float4* %tmp36, i64 %tmp37 | |
%tmp39 = bitcast %float4* %tmp38 to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* %tmp39, i64 undef, i1 false) | |
%tmp40 = getelementptr inbounds %struct.blam, %struct.blam* %tmp33, i32 undef, i32 5 | |
%tmp41 = getelementptr inbounds %float3, %float3* %tmp40, i32 undef, i32 2 | |
%tmp42 = load float, float* %tmp41, align 4 | |
%tmp43 = getelementptr inbounds %float4, %float4* %tmp2, i32 undef, i32 2 | |
%tmp44 = load float, float* %tmp43, align 8 | |
%tmp45 = fsub contract float %tmp42, %tmp44 | |
%tmp46 = call %float4 @snork(float %tmp45) | |
br label %bb49 | |
bb49: ; preds = %bb15 | |
br label %bb11 | |
bb50: ; preds = %bb49 | |
unreachable | |
} | |
attributes #0 = { argmemonly nofree nounwind willreturn } | |
attributes #1 = { nounwind } | |
!nvvm.annotations = !{!0} | |
!0 = !{void (%struct.spam.2*)* @barney, !"kernel", i32 1} | |
!1 = !{!2, !2, i64 0} | |
!2 = !{!"vtable pointer", !3, i64 0} | |
!3 = !{!"Simple C++ TBAA"} | |
!4 = !{!5, !13, i64 64} | |
!5 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !6, i64 0, !6, i64 8, !8, i64 16, !10, i64 32, !11, i64 44, !12, i64 48, !13, i64 64, !11, i64 72, !7, i64 76, !11, i64 80} | |
!6 = !{!"any pointer", !7, i64 0} | |
!7 = !{!"omnipotent char", !3, i64 0} | |
!8 = !{!"_ZTS6float4", !9, i64 0, !9, i64 4, !9, i64 8, !9, i64 12} | |
!9 = !{!"float", !7, i64 0} | |
!10 = !{!"_ZTS4int3", !11, i64 0, !11, i64 4, !11, i64 8} | |
!11 = !{!"int", !7, i64 0} | |
!12 = !{!"_ZTS6float3", !9, i64 0, !9, i64 4, !9, i64 8} | |
!13 = !{!"long", !7, i64 0} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment