Created
June 26, 2016 05:12
-
-
Save briansp2020/31e440743dafdaccb8afe92282583e14 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = '<stdin>' | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
%struct.LocalQueues = type { [8 x i32], [8 x [400 x i32]], [8 x i32] } | |
%"class.hc::short_vector::int_2.0" = type { i32, i32 } | |
%struct.grid_launch_parm = type { %struct.gl_dim3, %struct.gl_dim3, %struct.gl_dim3, %struct.gl_dim3, i32, %"class.hc::accelerator_view"*, %"class.hc::completion_future"* } | |
%struct.gl_dim3 = type { i32, i32, i32 } | |
%"class.hc::accelerator_view" = type { %"class.std::__1::shared_ptr" } | |
%"class.std::__1::shared_ptr" = type { %"class.Kalmar::KalmarQueue"*, %"class.std::__1::__shared_weak_count"* } | |
%"class.Kalmar::KalmarQueue" = type { i32 (...)**, %"class.Kalmar::KalmarDevice"*, i32, i32 } | |
%"class.Kalmar::KalmarDevice" = type { i32 (...)**, i32, %"class.std::__1::map", %"class.std::__1::mutex" } | |
%"class.std::__1::map" = type { %"class.std::__1::__tree.13" } | |
%"class.std::__1::__tree.13" = type { %"class.std::__1::__tree_node.14"*, %"class.std::__1::__compressed_pair", %"class.std::__1::__compressed_pair.20" } | |
%"class.std::__1::__tree_node.14" = type { %"class.std::__1::__tree_node_base.base", %"union.std::__1::__value_type" } | |
%"class.std::__1::__tree_node_base.base" = type <{ %"class.std::__1::__tree_end_node", %"class.std::__1::__tree_node_base"*, %"class.std::__1::__tree_node_base"*, i8 }> | |
%"class.std::__1::__tree_end_node" = type { %"class.std::__1::__tree_node_base"* } | |
%"class.std::__1::__tree_node_base" = type { %"class.std::__1::__tree_end_node", %"class.std::__1::__tree_node_base"*, %"class.std::__1::__tree_node_base"*, i8 } | |
%"union.std::__1::__value_type" = type { %"struct.std::__1::pair" } | |
%"struct.std::__1::pair" = type { %"class.std::__1::__thread_id", %"class.std::__1::shared_ptr" } | |
%"class.std::__1::__thread_id" = type { i64 } | |
%"class.std::__1::__compressed_pair" = type { %"class.std::__1::__libcpp_compressed_pair_imp" } | |
%"class.std::__1::__libcpp_compressed_pair_imp" = type { %"class.std::__1::__tree_end_node" } | |
%"class.std::__1::__compressed_pair.20" = type { %"class.std::__1::__libcpp_compressed_pair_imp.21" } | |
%"class.std::__1::__libcpp_compressed_pair_imp.21" = type { i64 } | |
%"class.std::__1::mutex" = type { %union.pthread_mutex_t } | |
%union.pthread_mutex_t = type { %"struct.(anonymous union)::__pthread_mutex_s" } | |
%"struct.(anonymous union)::__pthread_mutex_s" = type { i32, i32, i32, i32, i32, i16, i16, %struct.__pthread_internal_list } | |
%struct.__pthread_internal_list = type { %struct.__pthread_internal_list*, %struct.__pthread_internal_list* } | |
%"class.std::__1::__shared_weak_count" = type { %"class.std::__1::__shared_count", i64 } | |
%"class.std::__1::__shared_count" = type { i32 (...)**, i64 } | |
%"class.hc::completion_future" = type { %"class.std::__1::shared_future", %"class.std::__1::thread"*, %"class.std::__1::shared_ptr.24" } | |
%"class.std::__1::shared_future" = type { %"class.std::__1::__assoc_sub_state"* } | |
%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 } | |
%"class.std::exception_ptr" = type { i8* } | |
%"class.std::__1::condition_variable" = type { %union.pthread_cond_t } | |
%union.pthread_cond_t = type { %struct.anon } | |
%struct.anon = type { i32, i32, i64, i64, i64, i8*, i32, i32 } | |
%"class.std::__1::thread" = type { i64 } | |
%"class.std::__1::shared_ptr.24" = type { %"class.Kalmar::KalmarAsyncOp"*, %"class.std::__1::__shared_weak_count"* } | |
%"class.Kalmar::KalmarAsyncOp" = type { i32 (...)** } | |
@count24 = addrspace(1) global i32 0, align 4 | |
@no_of_nodes_vol27 = addrspace(1) global i32 0, align 4 | |
@stay_vol29 = addrspace(1) global i32 0, align 4 | |
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16 | |
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447 = internal addrspace(3) global [512 x i32] undef, section "clamp_opencl_local", align 16 | |
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16 | |
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4 | |
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16 | |
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4 | |
; Function Attrs: nounwind readnone | |
declare i64 @amp_get_global_id(i32) #0 | |
; Function Attrs: nounwind | |
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 | |
; Function Attrs: nounwind | |
declare void @llvm.lifetime.start(i64, i8* nocapture) #1 | |
; Function Attrs: nounwind | |
declare void @llvm.lifetime.end(i64, i8* nocapture) #1 | |
; Function Attrs: nounwind readnone | |
declare i64 @amp_get_local_size(i32) #0 | |
; Function Attrs: nounwind readnone | |
declare i64 @hc_get_num_groups(i32) #0 | |
; Function Attrs: noduplicate | |
declare void @hc_barrier(i32) #2 | |
; Function Attrs: nounwind readnone | |
declare i64 @amp_get_local_id(i32) #3 | |
; Function Attrs: nounwind readnone | |
declare i64 @hc_get_group_id(i32) #3 | |
; Function Attrs: nounwind readnone | |
declare i64 @amp_get_group_id(i32) #0 | |
; Function Attrs: nounwind uwtable | |
define spir_kernel void @ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPcc(i64, i8 addrspace(1)*, i8 signext) #4 align 2 { | |
%4 = tail call spir_func i64 @amp_get_global_id(i32 0) #7 | |
%sext.i = shl i64 %4, 32 | |
%5 = ashr exact i64 %sext.i, 32 | |
%6 = icmp ult i64 %5, %0 | |
br i1 %6, label %.lr.ph.i, label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit | |
.lr.ph.i: ; preds = %3 | |
%7 = tail call spir_func i64 @amp_get_local_size(i32 0) #7 | |
%8 = tail call spir_func i64 @hc_get_num_groups(i32 0) #7 | |
%9 = shl i64 %7, 32 | |
%sext3.i = mul i64 %9, %8 | |
%10 = ashr exact i64 %sext3.i, 32 | |
br label %11 | |
; <label>:11 ; preds = %11, %.lr.ph.i | |
%indvars.iv.i = phi i64 [ %5, %.lr.ph.i ], [ %indvars.iv.next.i, %11 ] | |
%12 = getelementptr inbounds i8 addrspace(1)* %1, i64 %indvars.iv.i | |
store i8 %2, i8 addrspace(1)* %12, align 1, !tbaa !23 | |
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %10 | |
%13 = icmp ult i64 %indvars.iv.next.i, %0 | |
br i1 %13, label %11, label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit | |
_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit: ; preds = %11 | |
br label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit | |
_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit: ; preds = %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit, %3 | |
ret void | |
} | |
; Function Attrs: nounwind uwtable | |
define spir_kernel void @ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPjj(i64, i32 addrspace(1)*, i32) #4 align 2 { | |
%4 = tail call spir_func i64 @amp_get_global_id(i32 0) #7 | |
%sext.i = shl i64 %4, 32 | |
%5 = ashr exact i64 %sext.i, 32 | |
%6 = icmp ult i64 %5, %0 | |
br i1 %6, label %.lr.ph.i, label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit | |
.lr.ph.i: ; preds = %3 | |
%7 = tail call spir_func i64 @amp_get_local_size(i32 0) #7 | |
%8 = tail call spir_func i64 @hc_get_num_groups(i32 0) #7 | |
%9 = shl i64 %7, 32 | |
%sext3.i = mul i64 %9, %8 | |
%10 = ashr exact i64 %sext3.i, 32 | |
br label %11 | |
; <label>:11 ; preds = %11, %.lr.ph.i | |
%indvars.iv.i = phi i64 [ %5, %.lr.ph.i ], [ %indvars.iv.next.i, %11 ] | |
%12 = getelementptr inbounds i32 addrspace(1)* %1, i64 %indvars.iv.i | |
store i32 %2, i32 addrspace(1)* %12, align 4, !tbaa !26 | |
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %10 | |
%13 = icmp ult i64 %indvars.iv.next.i, %0 | |
br i1 %13, label %11, label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit | |
_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit: ; preds = %11 | |
br label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit | |
_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit: ; preds = %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit, %3 | |
ret void | |
} | |
; Function Attrs: uwtable | |
define spir_kernel void @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*) #5 align 2 { | |
%25 = alloca %struct.grid_launch_parm, align 8 | |
%26 = alloca %struct.grid_launch_parm, align 16 | |
%.sroa.23 = alloca [20 x i8], align 4 | |
%27 = insertelement <4 x i32> undef, i32 %0, i32 0 | |
%28 = insertelement <4 x i32> %27, i32 %1, i32 1 | |
%29 = insertelement <4 x i32> %28, i32 %2, i32 2 | |
%30 = insertelement <4 x i32> %29, i32 %3, i32 3 | |
%31 = insertelement <4 x i32> undef, i32 %4, i32 0 | |
%32 = insertelement <4 x i32> %31, i32 %5, i32 1 | |
%33 = tail call spir_func i64 @amp_get_local_id(i32 2) #7 | |
%34 = tail call spir_func i64 @amp_get_local_id(i32 1) #7 | |
%35 = tail call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%36 = trunc i64 %33 to i32 | |
%37 = trunc i64 %34 to i32 | |
%38 = trunc i64 %35 to i32 | |
%39 = tail call spir_func i64 @amp_get_group_id(i32 2) #7 | |
%40 = tail call spir_func i64 @amp_get_group_id(i32 1) #7 | |
%41 = tail call spir_func i64 @amp_get_group_id(i32 0) #7 | |
%42 = trunc i64 %39 to i32 | |
%43 = trunc i64 %40 to i32 | |
%44 = trunc i64 %41 to i32 | |
%45 = bitcast %struct.grid_launch_parm* %26 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %45) | |
%46 = insertelement <4 x i32> %32, i32 %44, i32 2 | |
%47 = insertelement <4 x i32> %46, i32 %43, i32 3 | |
%48 = insertelement <4 x i32> undef, i32 %42, i32 0 | |
%49 = insertelement <4 x i32> %48, i32 %38, i32 1 | |
%50 = insertelement <4 x i32> %49, i32 %37, i32 2 | |
%51 = insertelement <4 x i32> %50, i32 %36, i32 3 | |
%52 = bitcast %struct.grid_launch_parm* %26 to <4 x i32>* | |
store <4 x i32> %30, <4 x i32>* %52, align 16 | |
%53 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 1, i32 1 | |
%54 = bitcast i32* %53 to <4 x i32>* | |
store <4 x i32> %47, <4 x i32>* %54, align 8 | |
%55 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 2, i32 2 | |
%56 = bitcast i32* %55 to <4 x i32>* | |
store <4 x i32> %51, <4 x i32>* %56, align 8 | |
%57 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 4 | |
store i32 %12, i32* %57, align 16 | |
%58 = getelementptr inbounds i8* %45, i64 52 | |
%59 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0 | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %58, i8* %59, i64 20, i32 4, i1 false) | |
%60 = bitcast %struct.grid_launch_parm* %25 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %60) | |
%tmp = bitcast %struct.grid_launch_parm* %25 to i8* | |
%tmp1 = bitcast %struct.grid_launch_parm* %26 to i8* | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false) | |
%61 = call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%62 = icmp slt i64 %61, 8 | |
br i1 %62, label %63, label %74 | |
; <label>:63 ; preds = %24 | |
%sext2.i = shl i64 %61, 32 | |
%64 = ashr exact i64 %sext2.i, 32 | |
%65 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %64 | |
store i32 0, i32 addrspace(3)* %65, align 4, !tbaa !26 | |
%66 = call spir_func i64 @amp_get_local_size(i32 0) #7 | |
%67 = lshr i64 %66, 3 | |
%68 = and i64 %66, 7 | |
%69 = icmp slt i64 %61, %68 | |
%70 = zext i1 %69 to i64 | |
%71 = add nuw nsw i64 %70, %67 | |
%72 = trunc i64 %71 to i32 | |
%73 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 2, i64 %64 | |
store i32 %72, i32 addrspace(3)* %73, align 4, !tbaa !26 | |
br label %74 | |
; <label>:74 ; preds = %63, %24 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%75 = call spir_func i64 @hc_get_group_id(i32 0) #7 | |
%76 = shl nsw i64 %75, 9 | |
%77 = add nsw i64 %76, %61 | |
%78 = trunc i64 %77 to i32 | |
%79 = icmp slt i32 %78, %19 | |
br i1 %79, label %80, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
; <label>:80 ; preds = %74 | |
%sext.i = shl i64 %77, 32 | |
%81 = ashr exact i64 %sext.i, 32 | |
%82 = getelementptr inbounds i32 addrspace(1)* %13, i64 %81 | |
%83 = load i32 addrspace(1)* %82, align 4, !tbaa !26 | |
%84 = sext i32 %83 to i64 | |
%85 = getelementptr inbounds i32 addrspace(1)* %17, i64 %84 | |
store i32 16677221, i32 addrspace(1)* %85, align 4, !tbaa !26 | |
%86 = getelementptr inbounds i32 addrspace(1)* %18, i64 %84 | |
%87 = load i32 addrspace(1)* %86, align 4, !tbaa !26 | |
%88 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %84, i32 0 | |
%89 = load i32 addrspace(1)* %88, align 4, !tbaa !28 | |
%90 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %84, i32 1 | |
%91 = load i32 addrspace(1)* %90, align 4, !tbaa !30 | |
%92 = add nsw i32 %91, %89 | |
%93 = icmp sgt i32 %91, 0 | |
br i1 %93, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
.lr.ph.i.i: ; preds = %80 | |
%94 = and i64 %61, 7 | |
%95 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %94 | |
%96 = sext i32 %89 to i64 | |
br label %97 | |
; <label>:97 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i | |
%indvars.iv.i.i = phi i64 [ %96, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ] | |
%98 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0 | |
%99 = load i32 addrspace(1)* %98, align 4, !tbaa !28 | |
%100 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1 | |
%101 = load i32 addrspace(1)* %100, align 4, !tbaa !30 | |
%102 = add nsw i32 %101, %87 | |
%103 = sext i32 %99 to i64 | |
%104 = getelementptr inbounds i32 addrspace(1)* %18, i64 %103 | |
%105 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %104, i32 %102) #9 | |
%106 = icmp sgt i32 %105, %102 | |
br i1 %106, label %107, label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:107 ; preds = %97 | |
%108 = getelementptr inbounds i32 addrspace(1)* %17, i64 %103 | |
%109 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %108, i32 %21) #9 | |
%110 = icmp eq i32 %109, %21 | |
br i1 %110, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %111 | |
; <label>:111 ; preds = %107 | |
%112 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %95, i32 1) #9 | |
%113 = icmp sgt i32 %112, 399 | |
br i1 %113, label %114, label %115 | |
; <label>:114 ; preds = %111 | |
store i32 1, i32 addrspace(1)* %23, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:115 ; preds = %111 | |
%116 = sext i32 %112 to i64 | |
%117 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 1, i64 %94, i64 %116 | |
store i32 %99, i32 addrspace(3)* %117, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %115, %114, %107, %97 | |
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1 | |
%118 = trunc i64 %indvars.iv.next.i.i to i32 | |
%119 = icmp slt i32 %118, %92 | |
br i1 %119, label %97, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i | |
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %80, %74 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%120 = icmp eq i64 %61, 0 | |
br i1 %120, label %121, label %154 | |
; <label>:121 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
%122 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 0 | |
store i32 0, i32 addrspace(3)* %122, align 16, !tbaa !26 | |
%123 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 0 | |
%124 = load i32 addrspace(3)* %123, align 4, !tbaa !26 | |
%125 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 1 | |
store i32 %124, i32 addrspace(3)* %125, align 4, !tbaa !26 | |
%126 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 1 | |
%127 = load i32 addrspace(3)* %126, align 4, !tbaa !26 | |
%128 = add nsw i32 %127, %124 | |
%129 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 2 | |
store i32 %128, i32 addrspace(3)* %129, align 8, !tbaa !26 | |
%130 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 2 | |
%131 = load i32 addrspace(3)* %130, align 4, !tbaa !26 | |
%132 = add nsw i32 %131, %128 | |
%133 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 3 | |
store i32 %132, i32 addrspace(3)* %133, align 4, !tbaa !26 | |
%134 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 3 | |
%135 = load i32 addrspace(3)* %134, align 4, !tbaa !26 | |
%136 = add nsw i32 %135, %132 | |
%137 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 4 | |
store i32 %136, i32 addrspace(3)* %137, align 16, !tbaa !26 | |
%138 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 4 | |
%139 = load i32 addrspace(3)* %138, align 4, !tbaa !26 | |
%140 = add nsw i32 %139, %136 | |
%141 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 5 | |
store i32 %140, i32 addrspace(3)* %141, align 4, !tbaa !26 | |
%142 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 5 | |
%143 = load i32 addrspace(3)* %142, align 4, !tbaa !26 | |
%144 = add nsw i32 %143, %140 | |
%145 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 6 | |
store i32 %144, i32 addrspace(3)* %145, align 8, !tbaa !26 | |
%146 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 6 | |
%147 = load i32 addrspace(3)* %146, align 4, !tbaa !26 | |
%148 = add nsw i32 %147, %144 | |
%149 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 7 | |
store i32 %148, i32 addrspace(3)* %149, align 4, !tbaa !26 | |
%150 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 7 | |
%151 = load i32 addrspace(3)* %150, align 4, !tbaa !26 | |
%152 = add nsw i32 %151, %148 | |
%153 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* %20, i32 %152) #9 | |
store i32 %153, i32 addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318, align 4, !tbaa !26 | |
br label %154 | |
; <label>:154 ; preds = %121, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
call spir_func void @hc_barrier(i32 1) #8 | |
%155 = load i32 addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318, align 4, !tbaa !26 | |
%156 = sext i32 %155 to i64 | |
%157 = and i64 %61, 7 | |
%158 = lshr i64 %61, 3 | |
%159 = trunc i64 %158 to i32 | |
%160 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %157 | |
%161 = load i32 addrspace(3)* %160, align 4, !tbaa !26 | |
%162 = icmp slt i32 %159, %161 | |
br i1 %162, label %.lr.ph.i1.i, label %_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit | |
.lr.ph.i1.i: ; preds = %154 | |
%163 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 %157 | |
%164 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 2, i64 %157 | |
br label %165 | |
; <label>:165 ; preds = %165, %.lr.ph.i1.i | |
%local_shift.01.i.i = phi i32 [ %159, %.lr.ph.i1.i ], [ %174, %165 ] | |
%166 = sext i32 %local_shift.01.i.i to i64 | |
%167 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 1, i64 %157, i64 %166 | |
%168 = load i32 addrspace(3)* %167, align 4, !tbaa !26 | |
%169 = load i32 addrspace(3)* %163, align 4, !tbaa !26 | |
%170 = add nsw i32 %169, %local_shift.01.i.i | |
%171 = sext i32 %170 to i64 | |
%.sum.i = add nsw i64 %171, %156 | |
%172 = getelementptr inbounds i32 addrspace(1)* %14, i64 %.sum.i | |
store i32 %168, i32 addrspace(1)* %172, align 4, !tbaa !26 | |
%173 = load i32 addrspace(3)* %164, align 4, !tbaa !26 | |
%174 = add nsw i32 %173, %local_shift.01.i.i | |
%175 = load i32 addrspace(3)* %160, align 4, !tbaa !26 | |
%176 = icmp slt i32 %174, %175 | |
br i1 %176, label %165, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %165 | |
br label %_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit | |
_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %154 | |
%177 = bitcast %struct.grid_launch_parm* %25 to i8* | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %177) | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %45) | |
ret void | |
} | |
declare i32 @atomic_min_int_global(i32 addrspace(1)*, i32) #6 | |
declare i32 @atomic_exchange_int_global(i32 addrspace(1)*, i32) #6 | |
declare i32 @atomic_add_int_local(i32 addrspace(3)*, i32) #6 | |
declare i32 @atomic_add_int_global(i32 addrspace(1)*, i32) #6 | |
; Function Attrs: uwtable | |
define spir_kernel void @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) #5 align 2 { | |
%28 = alloca %struct.grid_launch_parm, align 8 | |
%29 = alloca %struct.grid_launch_parm, align 16 | |
%.sroa.23 = alloca [20 x i8], align 4 | |
%30 = insertelement <4 x i32> undef, i32 %0, i32 0 | |
%31 = insertelement <4 x i32> %30, i32 %1, i32 1 | |
%32 = insertelement <4 x i32> %31, i32 %2, i32 2 | |
%33 = insertelement <4 x i32> %32, i32 %3, i32 3 | |
%34 = insertelement <4 x i32> undef, i32 %4, i32 0 | |
%35 = insertelement <4 x i32> %34, i32 %5, i32 1 | |
%36 = tail call spir_func i64 @amp_get_local_id(i32 2) #7 | |
%37 = tail call spir_func i64 @amp_get_local_id(i32 1) #7 | |
%38 = tail call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%39 = trunc i64 %36 to i32 | |
%40 = trunc i64 %37 to i32 | |
%41 = trunc i64 %38 to i32 | |
%42 = tail call spir_func i64 @amp_get_group_id(i32 2) #7 | |
%43 = tail call spir_func i64 @amp_get_group_id(i32 1) #7 | |
%44 = tail call spir_func i64 @amp_get_group_id(i32 0) #7 | |
%45 = trunc i64 %42 to i32 | |
%46 = trunc i64 %43 to i32 | |
%47 = trunc i64 %44 to i32 | |
%48 = bitcast %struct.grid_launch_parm* %29 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %48) | |
%49 = insertelement <4 x i32> %35, i32 %47, i32 2 | |
%50 = insertelement <4 x i32> %49, i32 %46, i32 3 | |
%51 = insertelement <4 x i32> undef, i32 %45, i32 0 | |
%52 = insertelement <4 x i32> %51, i32 %41, i32 1 | |
%53 = insertelement <4 x i32> %52, i32 %40, i32 2 | |
%54 = insertelement <4 x i32> %53, i32 %39, i32 3 | |
%55 = bitcast %struct.grid_launch_parm* %29 to <4 x i32>* | |
store <4 x i32> %33, <4 x i32>* %55, align 16 | |
%56 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 1, i32 1 | |
%57 = bitcast i32* %56 to <4 x i32>* | |
store <4 x i32> %50, <4 x i32>* %57, align 8 | |
%58 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 2, i32 2 | |
%59 = bitcast i32* %58 to <4 x i32>* | |
store <4 x i32> %54, <4 x i32>* %59, align 8 | |
%60 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 4 | |
store i32 %12, i32* %60, align 16 | |
%61 = getelementptr inbounds i8* %48, i64 52 | |
%62 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0 | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %61, i8* %62, i64 20, i32 4, i1 false) | |
%63 = bitcast %struct.grid_launch_parm* %28 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %63) | |
%tmp = bitcast %struct.grid_launch_parm* %28 to i8* | |
%tmp1 = bitcast %struct.grid_launch_parm* %29 to i8* | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false) | |
%64 = call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%65 = icmp eq i64 %64, 0 | |
br i1 %65, label %66, label %71 | |
; <label>:66 ; preds = %27 | |
store i32 1, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%67 = call spir_func i64 @hc_get_group_id(i32 0) #7 | |
%68 = icmp eq i64 %67, 0 | |
br i1 %68, label %69, label %71 | |
; <label>:69 ; preds = %66 | |
%70 = load i32 addrspace(1)* %19, align 4, !tbaa !26 | |
store volatile i32 %70, i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26 | |
br label %71 | |
; <label>:71 ; preds = %69, %66, %27 | |
%72 = call spir_func i32 @atomic_or_int_global(i32 addrspace(1)* %25, i32 0) #9 | |
%73 = icmp slt i64 %64, 8 | |
%sext7.i = shl i64 %64, 32 | |
%74 = ashr exact i64 %sext7.i, 32 | |
%75 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 %74 | |
%76 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 2, i64 %74 | |
%77 = and i64 %64, 7 | |
%78 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 %77 | |
%79 = lshr i64 %64, 3 | |
%80 = trunc i64 %79 to i32 | |
%81 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 %77 | |
%82 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 2, i64 %77 | |
br label %83 | |
; <label>:83 ; preds = %_Z20start_global_barrieri.exit2.i, %71 | |
%kt.0.i = phi i32 [ %72, %71 ], [ %215, %_Z20start_global_barrieri.exit2.i ] | |
%.0.i = phi i32 [ %21, %71 ], [ %.1.i, %_Z20start_global_barrieri.exit2.i ] | |
br i1 %73, label %84, label %92 | |
; <label>:84 ; preds = %83 | |
store i32 0, i32 addrspace(3)* %75, align 4, !tbaa !26 | |
%85 = call spir_func i64 @amp_get_local_size(i32 0) #7 | |
%86 = lshr i64 %85, 3 | |
%87 = and i64 %85, 7 | |
%88 = icmp slt i64 %64, %87 | |
%89 = zext i1 %88 to i64 | |
%90 = add nuw nsw i64 %89, %86 | |
%91 = trunc i64 %90 to i32 | |
store i32 %91, i32 addrspace(3)* %76, align 4, !tbaa !26 | |
br label %92 | |
; <label>:92 ; preds = %84, %83 | |
br i1 %65, label %93, label %95 | |
; <label>:93 ; preds = %92 | |
%94 = load volatile i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26 | |
store i32 %94, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934, align 4, !tbaa !26 | |
br label %95 | |
; <label>:95 ; preds = %93, %92 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%96 = call spir_func i64 @hc_get_group_id(i32 0) #7 | |
%97 = shl nsw i64 %96, 9 | |
%98 = add nsw i64 %97, %64 | |
%99 = trunc i64 %98 to i32 | |
%100 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934, align 4, !tbaa !26 | |
%101 = icmp slt i32 %99, %100 | |
br i1 %101, label %102, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
; <label>:102 ; preds = %95 | |
%103 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%104 = icmp ne i32 %103, 0 | |
%105 = select i1 %104, i32 addrspace(1)* %13, i32 addrspace(1)* %14 | |
%sext.i = shl i64 %98, 32 | |
%106 = ashr exact i64 %sext.i, 32 | |
%107 = getelementptr inbounds i32 addrspace(1)* %105, i64 %106 | |
%108 = call spir_func i32 @atomic_or_int_global(i32 addrspace(1)* %107, i32 0) #9 | |
%109 = sext i32 %108 to i64 | |
%110 = getelementptr inbounds i32 addrspace(1)* %17, i64 %109 | |
store i32 16677221, i32 addrspace(1)* %110, align 4, !tbaa !26 | |
%111 = getelementptr inbounds i32 addrspace(1)* %18, i64 %109 | |
%112 = load i32 addrspace(1)* %111, align 4, !tbaa !26 | |
%113 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %109, i32 0 | |
%114 = load i32 addrspace(1)* %113, align 4, !tbaa !28 | |
%115 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %109, i32 1 | |
%116 = load i32 addrspace(1)* %115, align 4, !tbaa !30 | |
%117 = add nsw i32 %116, %114 | |
%118 = icmp sgt i32 %116, 0 | |
br i1 %118, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
.lr.ph.i.i: ; preds = %102 | |
%119 = sext i32 %114 to i64 | |
br label %120 | |
; <label>:120 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i | |
%indvars.iv.i.i = phi i64 [ %119, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ] | |
%121 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0 | |
%122 = load i32 addrspace(1)* %121, align 4, !tbaa !28 | |
%123 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1 | |
%124 = load i32 addrspace(1)* %123, align 4, !tbaa !30 | |
%125 = add nsw i32 %124, %112 | |
%126 = sext i32 %122 to i64 | |
%127 = getelementptr inbounds i32 addrspace(1)* %18, i64 %126 | |
%128 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %127, i32 %125) #9 | |
%129 = icmp sgt i32 %128, %125 | |
br i1 %129, label %130, label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:130 ; preds = %120 | |
%131 = getelementptr inbounds i32 addrspace(1)* %17, i64 %126 | |
%132 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %131, i32 %.0.i) #9 | |
%133 = icmp eq i32 %132, %.0.i | |
br i1 %133, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %134 | |
; <label>:134 ; preds = %130 | |
%135 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %78, i32 1) #9 | |
%136 = icmp sgt i32 %135, 399 | |
br i1 %136, label %137, label %138 | |
; <label>:137 ; preds = %134 | |
store i32 1, i32 addrspace(1)* %26, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:138 ; preds = %134 | |
%139 = sext i32 %135 to i64 | |
%140 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 1, i64 %77, i64 %139 | |
store i32 %122, i32 addrspace(3)* %140, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %138, %137, %130, %120 | |
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1 | |
%141 = trunc i64 %indvars.iv.next.i.i to i32 | |
%142 = icmp slt i32 %141, %117 | |
br i1 %142, label %120, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i | |
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %102, %95 | |
call spir_func void @hc_barrier(i32 1) #8 | |
br i1 %65, label %143, label %176 | |
; <label>:143 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
%144 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 0 | |
store i32 0, i32 addrspace(3)* %144, align 16, !tbaa !26 | |
%145 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 0 | |
%146 = load i32 addrspace(3)* %145, align 4, !tbaa !26 | |
%147 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 1 | |
store i32 %146, i32 addrspace(3)* %147, align 4, !tbaa !26 | |
%148 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 1 | |
%149 = load i32 addrspace(3)* %148, align 4, !tbaa !26 | |
%150 = add nsw i32 %149, %146 | |
%151 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 2 | |
store i32 %150, i32 addrspace(3)* %151, align 8, !tbaa !26 | |
%152 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 2 | |
%153 = load i32 addrspace(3)* %152, align 4, !tbaa !26 | |
%154 = add nsw i32 %153, %150 | |
%155 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 3 | |
store i32 %154, i32 addrspace(3)* %155, align 4, !tbaa !26 | |
%156 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 3 | |
%157 = load i32 addrspace(3)* %156, align 4, !tbaa !26 | |
%158 = add nsw i32 %157, %154 | |
%159 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 4 | |
store i32 %158, i32 addrspace(3)* %159, align 16, !tbaa !26 | |
%160 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 4 | |
%161 = load i32 addrspace(3)* %160, align 4, !tbaa !26 | |
%162 = add nsw i32 %161, %158 | |
%163 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 5 | |
store i32 %162, i32 addrspace(3)* %163, align 4, !tbaa !26 | |
%164 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 5 | |
%165 = load i32 addrspace(3)* %164, align 4, !tbaa !26 | |
%166 = add nsw i32 %165, %162 | |
%167 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 6 | |
store i32 %166, i32 addrspace(3)* %167, align 8, !tbaa !26 | |
%168 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 6 | |
%169 = load i32 addrspace(3)* %168, align 4, !tbaa !26 | |
%170 = add nsw i32 %169, %166 | |
%171 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 7 | |
store i32 %170, i32 addrspace(3)* %171, align 4, !tbaa !26 | |
%172 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 7 | |
%173 = load i32 addrspace(3)* %172, align 4, !tbaa !26 | |
%174 = add nsw i32 %173, %170 | |
%175 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* %20, i32 %174) #9 | |
store i32 %175, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833, align 4, !tbaa !26 | |
br label %176 | |
; <label>:176 ; preds = %143, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
call spir_func void @hc_barrier(i32 1) #8 | |
%177 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%178 = icmp ne i32 %177, 0 | |
%179 = select i1 %178, i32 addrspace(1)* %14, i32 addrspace(1)* %13 | |
%180 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833, align 4, !tbaa !26 | |
%181 = sext i32 %180 to i64 | |
%182 = load i32 addrspace(3)* %78, align 4, !tbaa !26 | |
%183 = icmp slt i32 %80, %182 | |
br i1 %183, label %.lr.ph.i1.preheader.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
.lr.ph.i1.preheader.i: ; preds = %176 | |
br label %.lr.ph.i1.i | |
.lr.ph.i1.i: ; preds = %.lr.ph.i1.i, %.lr.ph.i1.preheader.i | |
%local_shift.01.i.i = phi i32 [ %192, %.lr.ph.i1.i ], [ %80, %.lr.ph.i1.preheader.i ] | |
%184 = sext i32 %local_shift.01.i.i to i64 | |
%185 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 1, i64 %77, i64 %184 | |
%186 = load i32 addrspace(3)* %185, align 4, !tbaa !26 | |
%187 = load i32 addrspace(3)* %81, align 4, !tbaa !26 | |
%188 = add nsw i32 %187, %local_shift.01.i.i | |
%189 = sext i32 %188 to i64 | |
%.sum.i = add nsw i64 %189, %181 | |
%190 = getelementptr inbounds i32 addrspace(1)* %179, i64 %.sum.i | |
store i32 %186, i32 addrspace(1)* %190, align 4, !tbaa !26 | |
%191 = load i32 addrspace(3)* %82, align 4, !tbaa !26 | |
%192 = add nsw i32 %191, %local_shift.01.i.i | |
%193 = load i32 addrspace(3)* %78, align 4, !tbaa !26 | |
%194 = icmp slt i32 %192, %193 | |
br i1 %194, label %.lr.ph.i1.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %.lr.ph.i1.i | |
br label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit.i: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %176 | |
br i1 %65, label %195, label %200 | |
; <label>:195 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
%196 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%197 = add nsw i32 %196, 1 | |
%198 = srem i32 %197, 2 | |
store i32 %198, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%199 = icmp eq i32 %.0.i, 16677219 | |
%..i = select i1 %199, i32 16677220, i32 16677219 | |
br label %200 | |
; <label>:200 ; preds = %195, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
%.1.i = phi i32 [ %.0.i, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i ], [ %..i, %195 ] | |
call spir_func void @hc_barrier(i32 1) #8 | |
br i1 %65, label %201, label %_Z20start_global_barrieri.exit.i | |
; <label>:201 ; preds = %200 | |
%202 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* @count24, i32 1) #9 | |
%203 = mul i32 %kt.0.i, 14 | |
%204 = add i32 %203, 14 | |
br label %205 | |
; <label>:205 ; preds = %205, %201 | |
%206 = load volatile i32 addrspace(1)* @count24, align 4, !tbaa !26 | |
%207 = icmp slt i32 %206, %204 | |
br i1 %207, label %205, label %_Z20start_global_barrieri.exit.loopexit.i | |
_Z20start_global_barrieri.exit.loopexit.i: ; preds = %205 | |
br label %_Z20start_global_barrieri.exit.i | |
_Z20start_global_barrieri.exit.i: ; preds = %_Z20start_global_barrieri.exit.loopexit.i, %200 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%208 = or i64 %96, %64 | |
%brmerge.i = icmp eq i64 %208, 0 | |
br i1 %brmerge.i, label %209, label %214 | |
; <label>:209 ; preds = %_Z20start_global_barrieri.exit.i | |
store volatile i32 0, i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26 | |
%210 = load i32 addrspace(1)* %20, align 4, !tbaa !26 | |
%.off.i = add i32 %210, -513 | |
%211 = icmp ult i32 %.off.i, 6655 | |
br i1 %211, label %212, label %214 | |
; <label>:212 ; preds = %209 | |
store volatile i32 1, i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26 | |
%213 = load i32 addrspace(1)* %20, align 4, !tbaa !26 | |
store volatile i32 %213, i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26 | |
store i32 0, i32 addrspace(1)* %20, align 4, !tbaa !26 | |
br label %214 | |
; <label>:214 ; preds = %212, %209, %_Z20start_global_barrieri.exit.i | |
%215 = add nsw i32 %kt.0.i, 2 | |
call spir_func void @hc_barrier(i32 1) #8 | |
br i1 %65, label %216, label %_Z20start_global_barrieri.exit2.i | |
; <label>:216 ; preds = %214 | |
%217 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* @count24, i32 1) #9 | |
%218 = mul nsw i32 %215, 14 | |
br label %219 | |
; <label>:219 ; preds = %219, %216 | |
%220 = load volatile i32 addrspace(1)* @count24, align 4, !tbaa !26 | |
%221 = icmp slt i32 %220, %218 | |
br i1 %221, label %219, label %_Z20start_global_barrieri.exit2.loopexit.i | |
_Z20start_global_barrieri.exit2.loopexit.i: ; preds = %219 | |
br label %_Z20start_global_barrieri.exit2.i | |
_Z20start_global_barrieri.exit2.i: ; preds = %_Z20start_global_barrieri.exit2.loopexit.i, %214 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%222 = load volatile i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26 | |
%223 = icmp eq i32 %222, 0 | |
br i1 %223, label %224, label %83 | |
; <label>:224 ; preds = %_Z20start_global_barrieri.exit2.i | |
%.not4.i = icmp ne i64 %96, 0 | |
%.not5.i = xor i1 %65, true | |
%brmerge6.i = or i1 %.not4.i, %.not5.i | |
br i1 %brmerge6.i, label %_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit, label %225 | |
; <label>:225 ; preds = %224 | |
store i32 %215, i32 addrspace(1)* %25, align 4, !tbaa !26 | |
%226 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26 | |
%227 = add nsw i32 %226, 1 | |
%228 = srem i32 %227, 2 | |
store i32 %228, i32 addrspace(1)* %23, align 4, !tbaa !26 | |
%229 = load volatile i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26 | |
store i32 %229, i32 addrspace(1)* %19, align 4, !tbaa !26 | |
br label %_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit | |
_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit: ; preds = %225, %224 | |
%230 = bitcast %struct.grid_launch_parm* %28 to i8* | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %230) | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %48) | |
ret void | |
} | |
declare i32 @atomic_or_int_global(i32 addrspace(1)*, i32) #6 | |
; Function Attrs: uwtable | |
define spir_kernel void @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*) #5 align 2 { | |
%25 = alloca %struct.grid_launch_parm, align 8 | |
%26 = alloca %struct.grid_launch_parm, align 16 | |
%.sroa.23 = alloca [20 x i8], align 4 | |
%27 = insertelement <4 x i32> undef, i32 %0, i32 0 | |
%28 = insertelement <4 x i32> %27, i32 %1, i32 1 | |
%29 = insertelement <4 x i32> %28, i32 %2, i32 2 | |
%30 = insertelement <4 x i32> %29, i32 %3, i32 3 | |
%31 = insertelement <4 x i32> undef, i32 %4, i32 0 | |
%32 = insertelement <4 x i32> %31, i32 %5, i32 1 | |
%33 = tail call spir_func i64 @amp_get_local_id(i32 2) #7 | |
%34 = tail call spir_func i64 @amp_get_local_id(i32 1) #7 | |
%35 = tail call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%36 = trunc i64 %33 to i32 | |
%37 = trunc i64 %34 to i32 | |
%38 = trunc i64 %35 to i32 | |
%39 = tail call spir_func i64 @amp_get_group_id(i32 2) #7 | |
%40 = tail call spir_func i64 @amp_get_group_id(i32 1) #7 | |
%41 = tail call spir_func i64 @amp_get_group_id(i32 0) #7 | |
%42 = trunc i64 %39 to i32 | |
%43 = trunc i64 %40 to i32 | |
%44 = trunc i64 %41 to i32 | |
%45 = bitcast %struct.grid_launch_parm* %26 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %45) | |
%46 = insertelement <4 x i32> %32, i32 %44, i32 2 | |
%47 = insertelement <4 x i32> %46, i32 %43, i32 3 | |
%48 = insertelement <4 x i32> undef, i32 %42, i32 0 | |
%49 = insertelement <4 x i32> %48, i32 %38, i32 1 | |
%50 = insertelement <4 x i32> %49, i32 %37, i32 2 | |
%51 = insertelement <4 x i32> %50, i32 %36, i32 3 | |
%52 = bitcast %struct.grid_launch_parm* %26 to <4 x i32>* | |
store <4 x i32> %30, <4 x i32>* %52, align 16 | |
%53 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 1, i32 1 | |
%54 = bitcast i32* %53 to <4 x i32>* | |
store <4 x i32> %47, <4 x i32>* %54, align 8 | |
%55 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 2, i32 2 | |
%56 = bitcast i32* %55 to <4 x i32>* | |
store <4 x i32> %51, <4 x i32>* %56, align 8 | |
%57 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 4 | |
store i32 %12, i32* %57, align 16 | |
%58 = getelementptr inbounds i8* %45, i64 52 | |
%59 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0 | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %58, i8* %59, i64 20, i32 4, i1 false) | |
%60 = bitcast %struct.grid_launch_parm* %25 to i8* | |
call spir_func void @llvm.lifetime.start(i64 72, i8* %60) | |
%tmp = bitcast %struct.grid_launch_parm* %25 to i8* | |
%tmp1 = bitcast %struct.grid_launch_parm* %26 to i8* | |
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false) | |
%61 = call spir_func i64 @amp_get_local_id(i32 0) #7 | |
%62 = icmp eq i64 %61, 0 | |
br i1 %62, label %63, label %.preheader.i | |
; <label>:63 ; preds = %24 | |
store i32 0, i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26 | |
br label %.preheader.i | |
.preheader.i: ; preds = %63, %24 | |
%64 = icmp slt i64 %61, 8 | |
%sext6.i = shl i64 %61, 32 | |
%65 = ashr exact i64 %sext6.i, 32 | |
%66 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 %65 | |
%67 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 2, i64 %65 | |
%68 = and i64 %61, 7 | |
%69 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 %68 | |
%70 = lshr i64 %61, 3 | |
%71 = trunc i64 %70 to i32 | |
%72 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 %68 | |
%73 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 2, i64 %68 | |
%sext18.i = shl i64 %70, 32 | |
%74 = ashr exact i64 %sext18.i, 32 | |
br label %.outer.i | |
.outer.i: ; preds = %185, %.preheader.i | |
%.01.ph.i = phi i32 [ %21, %.preheader.i ], [ %..i, %185 ] | |
%.0.ph.i = phi i32 [ %19, %.preheader.i ], [ %184, %185 ] | |
br label %75 | |
; <label>:75 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i, %.outer.i | |
%.0.i = phi i32 [ %184, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i ], [ %.0.ph.i, %.outer.i ] | |
br i1 %64, label %76, label %84 | |
; <label>:76 ; preds = %75 | |
store i32 0, i32 addrspace(3)* %66, align 4, !tbaa !26 | |
%77 = call spir_func i64 @amp_get_local_size(i32 0) #7 | |
%78 = lshr i64 %77, 3 | |
%79 = and i64 %77, 7 | |
%80 = icmp slt i64 %61, %79 | |
%81 = zext i1 %80 to i64 | |
%82 = add nuw nsw i64 %81, %78 | |
%83 = trunc i64 %82 to i32 | |
store i32 %83, i32 addrspace(3)* %67, align 4, !tbaa !26 | |
br label %84 | |
; <label>:84 ; preds = %76, %75 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%85 = call spir_func i64 @hc_get_group_id(i32 0) #7 | |
%86 = shl nsw i64 %85, 9 | |
%87 = add nsw i64 %86, %61 | |
%88 = trunc i64 %87 to i32 | |
%89 = icmp slt i32 %88, %.0.i | |
br i1 %89, label %90, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
; <label>:90 ; preds = %84 | |
%91 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26 | |
%92 = icmp eq i32 %91, 0 | |
%sext.i = shl i64 %87, 32 | |
%93 = ashr exact i64 %sext.i, 32 | |
br i1 %92, label %94, label %96 | |
; <label>:94 ; preds = %90 | |
%95 = getelementptr inbounds i32 addrspace(1)* %13, i64 %93 | |
br label %98 | |
; <label>:96 ; preds = %90 | |
%97 = getelementptr inbounds [512 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447, i64 0, i64 %93 | |
br label %98 | |
; <label>:98 ; preds = %96, %94 | |
%pid.0.in.i = phi i32 addrspace(3)* [ %95, %94 ], [ %97, %96 ] | |
%pid.0.i = load i32 addrspace(3)* %pid.0.in.i, align 4 | |
%99 = sext i32 %pid.0.i to i64 | |
%100 = getelementptr inbounds i32 addrspace(1)* %17, i64 %99 | |
store i32 16677221, i32 addrspace(1)* %100, align 4, !tbaa !26 | |
%101 = getelementptr inbounds i32 addrspace(1)* %18, i64 %99 | |
%102 = load i32 addrspace(1)* %101, align 4, !tbaa !26 | |
%103 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %99, i32 0 | |
%104 = load i32 addrspace(1)* %103, align 4, !tbaa !28 | |
%105 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %99, i32 1 | |
%106 = load i32 addrspace(1)* %105, align 4, !tbaa !30 | |
%107 = add nsw i32 %106, %104 | |
%108 = icmp sgt i32 %106, 0 | |
br i1 %108, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
.lr.ph.i.i: ; preds = %98 | |
%109 = sext i32 %104 to i64 | |
br label %110 | |
; <label>:110 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i | |
%indvars.iv.i.i = phi i64 [ %109, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ] | |
%111 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0 | |
%112 = load i32 addrspace(1)* %111, align 4, !tbaa !28 | |
%113 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1 | |
%114 = load i32 addrspace(1)* %113, align 4, !tbaa !30 | |
%115 = add nsw i32 %114, %102 | |
%116 = sext i32 %112 to i64 | |
%117 = getelementptr inbounds i32 addrspace(1)* %18, i64 %116 | |
%118 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %117, i32 %115) #9 | |
%119 = icmp sgt i32 %118, %115 | |
br i1 %119, label %120, label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:120 ; preds = %110 | |
%121 = getelementptr inbounds i32 addrspace(1)* %17, i64 %116 | |
%122 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %121, i32 %.01.ph.i) #9 | |
%123 = icmp eq i32 %122, %.01.ph.i | |
br i1 %123, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %124 | |
; <label>:124 ; preds = %120 | |
%125 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %69, i32 1) #9 | |
%126 = icmp sgt i32 %125, 399 | |
br i1 %126, label %127, label %128 | |
; <label>:127 ; preds = %124 | |
store i32 1, i32 addrspace(1)* %23, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
; <label>:128 ; preds = %124 | |
%129 = sext i32 %125 to i64 | |
%130 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %129 | |
store i32 %112, i32 addrspace(3)* %130, align 4, !tbaa !26 | |
br label %_ZN11LocalQueues6appendEiPii.exit.i.i | |
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %128, %127, %120, %110 | |
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1 | |
%131 = trunc i64 %indvars.iv.next.i.i to i32 | |
%132 = icmp slt i32 %131, %107 | |
br i1 %132, label %110, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i | |
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %98, %84 | |
call spir_func void @hc_barrier(i32 1) #8 | |
br i1 %62, label %133, label %165 | |
; <label>:133 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
%134 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 0 | |
store i32 0, i32 addrspace(3)* %134, align 16, !tbaa !26 | |
%135 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 0 | |
%136 = load i32 addrspace(3)* %135, align 4, !tbaa !26 | |
%137 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 1 | |
store i32 %136, i32 addrspace(3)* %137, align 4, !tbaa !26 | |
%138 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 1 | |
%139 = load i32 addrspace(3)* %138, align 4, !tbaa !26 | |
%140 = add nsw i32 %139, %136 | |
%141 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 2 | |
store i32 %140, i32 addrspace(3)* %141, align 8, !tbaa !26 | |
%142 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 2 | |
%143 = load i32 addrspace(3)* %142, align 4, !tbaa !26 | |
%144 = add nsw i32 %143, %140 | |
%145 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 3 | |
store i32 %144, i32 addrspace(3)* %145, align 4, !tbaa !26 | |
%146 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 3 | |
%147 = load i32 addrspace(3)* %146, align 4, !tbaa !26 | |
%148 = add nsw i32 %147, %144 | |
%149 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 4 | |
store i32 %148, i32 addrspace(3)* %149, align 16, !tbaa !26 | |
%150 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 4 | |
%151 = load i32 addrspace(3)* %150, align 4, !tbaa !26 | |
%152 = add nsw i32 %151, %148 | |
%153 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 5 | |
store i32 %152, i32 addrspace(3)* %153, align 4, !tbaa !26 | |
%154 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 5 | |
%155 = load i32 addrspace(3)* %154, align 4, !tbaa !26 | |
%156 = add nsw i32 %155, %152 | |
%157 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 6 | |
store i32 %156, i32 addrspace(3)* %157, align 8, !tbaa !26 | |
%158 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 6 | |
%159 = load i32 addrspace(3)* %158, align 4, !tbaa !26 | |
%160 = add nsw i32 %159, %156 | |
%161 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 7 | |
store i32 %160, i32 addrspace(3)* %161, align 4, !tbaa !26 | |
%162 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 7 | |
%163 = load i32 addrspace(3)* %162, align 4, !tbaa !26 | |
%164 = add nsw i32 %163, %160 | |
store i32 %164, i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26 | |
store i32 %164, i32 addrspace(1)* %20, align 4, !tbaa !26 | |
br label %165 | |
; <label>:165 ; preds = %133, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i | |
call spir_func void @hc_barrier(i32 1) #8 | |
%166 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26 | |
%167 = icmp eq i32 %166, 0 | |
br i1 %167, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i, label %168 | |
; <label>:168 ; preds = %165 | |
%169 = icmp slt i32 %166, 513 | |
%170 = load i32 addrspace(3)* %69, align 4, !tbaa !26 | |
%171 = icmp slt i32 %71, %170 | |
br i1 %169, label %172, label %187 | |
; <label>:172 ; preds = %168 | |
br i1 %171, label %.lr.ph.i2.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
.lr.ph.i2.i: ; preds = %172 | |
%173 = load i32 addrspace(3)* %72, align 4, !tbaa !26 | |
%174 = load i32 addrspace(3)* %73, align 4, !tbaa !26 | |
%175 = sext i32 %174 to i64 | |
%176 = sext i32 %173 to i64 | |
br label %177 | |
; <label>:177 ; preds = %177, %.lr.ph.i2.i | |
%indvars.iv.i = phi i64 [ %indvars.iv.next.i, %177 ], [ %74, %.lr.ph.i2.i ] | |
%178 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %indvars.iv.i | |
%179 = load i32 addrspace(3)* %178, align 4, !tbaa !26 | |
%180 = add nsw i64 %176, %indvars.iv.i | |
%181 = getelementptr inbounds [512 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447, i64 0, i64 %180 | |
store i32 %179, i32 addrspace(3)* %181, align 4, !tbaa !26 | |
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %175 | |
%182 = trunc i64 %indvars.iv.next.i to i32 | |
%183 = icmp slt i32 %182, %170 | |
br i1 %183, label %177, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %177 | |
br label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit.i: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %172 | |
call spir_func void @hc_barrier(i32 1) #8 | |
%184 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26 | |
br i1 %62, label %185, label %75 | |
; <label>:185 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i | |
%186 = icmp eq i32 %.01.ph.i, 16677219 | |
%..i = select i1 %186, i32 16677220, i32 16677219 | |
br label %.outer.i | |
; <label>:187 ; preds = %168 | |
br i1 %171, label %.lr.ph.i3.preheader.i, label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit | |
.lr.ph.i3.preheader.i: ; preds = %187 | |
br label %.lr.ph.i3.i | |
.lr.ph.i3.i: ; preds = %.lr.ph.i3.i, %.lr.ph.i3.preheader.i | |
%local_shift.01.i4.i = phi i32 [ %196, %.lr.ph.i3.i ], [ %71, %.lr.ph.i3.preheader.i ] | |
%188 = sext i32 %local_shift.01.i4.i to i64 | |
%189 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %188 | |
%190 = load i32 addrspace(3)* %189, align 4, !tbaa !26 | |
%191 = load i32 addrspace(3)* %72, align 4, !tbaa !26 | |
%192 = add nsw i32 %191, %local_shift.01.i4.i | |
%193 = sext i32 %192 to i64 | |
%194 = getelementptr inbounds i32 addrspace(1)* %14, i64 %193 | |
store i32 %190, i32 addrspace(1)* %194, align 4, !tbaa !26 | |
%195 = load i32 addrspace(3)* %73, align 4, !tbaa !26 | |
%196 = add nsw i32 %195, %local_shift.01.i4.i | |
%197 = load i32 addrspace(3)* %69, align 4, !tbaa !26 | |
%198 = icmp slt i32 %196, %197 | |
br i1 %198, label %.lr.ph.i3.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i: ; preds = %.lr.ph.i3.i | |
br label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit | |
_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i: ; preds = %165 | |
br label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit | |
_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i, %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i, %187 | |
%199 = bitcast %struct.grid_launch_parm* %25 to i8* | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %199) | |
call spir_func void @llvm.lifetime.end(i64 72, i8* %45) | |
ret void | |
} | |
attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { nounwind } | |
attributes #2 = { noduplicate "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } | |
attributes #3 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } | |
attributes #4 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #5 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #6 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #7 = { nobuiltin nounwind readnone } | |
attributes #8 = { nobuiltin noduplicate } | |
attributes #9 = { nobuiltin } | |
!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !1, !1, !1, !1} | |
!opencl.kernels = !{!2, !8, !10, !16, !22} | |
!0 = metadata !{metadata !"HCC clang version 3.5.0 (based on HCC 0.10.16186-d14f969-7461349 LLVM 3.5.0svn)"} | |
!1 = metadata !{metadata !"HCC clang version 3.5.0 (based on HCC 0.10.16256-042a253-061e735 LLVM 3.5.0svn)"} | |
!2 = metadata !{void (i64, i8 addrspace(1)*, i8)* @ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPcc, metadata !3, metadata !4, metadata !5, metadata !6, metadata !7} | |
!3 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0} | |
!4 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"} | |
!5 = metadata !{metadata !"kernel_arg_type", metadata !"size_t", metadata !"char*", metadata !"char"} | |
!6 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !""} | |
!7 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !""} | |
!8 = metadata !{void (i64, i32 addrspace(1)*, i32)* @ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPjj, metadata !3, metadata !4, metadata !9, metadata !6, metadata !7} | |
!9 = metadata !{metadata !"kernel_arg_type", metadata !"size_t", metadata !"uint*", metadata !"uint"} | |
!10 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_, metadata !11, metadata !12, metadata !13, metadata !14, metadata !15} | |
!11 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} | |
!12 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none"} | |
!13 = metadata !{metadata !"kernel_arg_type", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"uint", metadata !"int*", metadata !"int*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"int*", metadata !"int*", metadata !"int", metadata !"int*", metadata !"int", metadata !"int", metadata !"int*"} | |
!14 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"", metadata !"const", metadata !"const", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""} | |
!15 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""} | |
!16 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1, metadata !17, metadata !18, metadata !19, metadata !20, metadata !21} | |
!17 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} | |
!18 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none"} | |
!19 = metadata !{metadata !"kernel_arg_type", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"uint", metadata !"int*", metadata !"int*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int", metadata !"int", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int*"} | |
!20 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"const", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"", metadata !""} | |
!21 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""} | |
!22 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_, metadata !11, metadata !12, metadata !13, metadata !14, metadata !15} | |
!23 = metadata !{metadata !24, metadata !24, i64 0} | |
!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0} | |
!25 = metadata !{metadata !"Simple C/C++ TBAA"} | |
!26 = metadata !{metadata !27, metadata !27, i64 0} | |
!27 = metadata !{metadata !"int", metadata !24, i64 0} | |
!28 = metadata !{metadata !29, metadata !27, i64 0} | |
!29 = metadata !{metadata !"_ZTSN2hc12short_vector5int_2E", metadata !27, i64 0, metadata !27, i64 4} | |
!30 = metadata !{metadata !29, metadata !27, i64 4} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment