Created
June 13, 2017 10:48
-
-
Save hughperkins/9176720df4c6f189ca72c73997a6209d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = './fill_copy_sequence-device-noopt.ll' | |
source_filename = "./fill_copy_sequence.cu" | |
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" | |
target triple = "nvptx64-nvidia-cuda" | |
%"class.thrust::system::cuda::detail::bulk_::uninitialized" = type { %"union.thrust::system::cuda::detail::bulk_::detail::aligned_storage<24, 8>::type" } | |
%"union.thrust::system::cuda::detail::bulk_::detail::aligned_storage<24, 8>::type" = type { [24 x i8] } | |
%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task" = type <{ %"class.thrust::system::cuda::detail::bulk_::detail::task_base", i32, [4 x i8] }> | |
%"class.thrust::system::cuda::detail::bulk_::detail::task_base" = type { %"class.thrust::system::cuda::detail::bulk_::detail::closure", %"class.thrust::system::cuda::detail::bulk_::parallel_group" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::closure" = type { %"struct.thrust::system::cuda::detail::for_each_n_detail::for_each_kernel", %"class.thrust::tuple" } | |
%"struct.thrust::system::cuda::detail::for_each_n_detail::for_each_kernel" = type { i8 } | |
%"class.thrust::tuple" = type { %"struct.thrust::detail::cons" } | |
%"struct.thrust::detail::cons" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor", %"struct.thrust::detail::cons.33" } | |
%"struct.thrust::system::cuda::detail::bulk_::detail::cursor" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor.27" } | |
%"struct.thrust::system::cuda::detail::bulk_::detail::cursor.27" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor.28" } | |
%"struct.thrust::system::cuda::detail::bulk_::detail::cursor.28" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor.29" } | |
%"struct.thrust::system::cuda::detail::bulk_::detail::cursor.29" = type { i8 } | |
%"struct.thrust::detail::cons.33" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.34" } | |
%"class.thrust::device_ptr" = type { %"class.thrust::pointer" } | |
%"class.thrust::pointer" = type { %"class.thrust::iterator_adaptor" } | |
%"class.thrust::iterator_adaptor" = type { i32* } | |
%"struct.thrust::detail::cons.34" = type { %"struct.thrust::detail::wrapped_function", %"struct.thrust::detail::cons.35" } | |
%"struct.thrust::detail::wrapped_function" = type { %"struct.thrust::detail::device_generate_functor" } | |
%"struct.thrust::detail::device_generate_functor" = type { %"struct.thrust::detail::fill_functor" } | |
%"struct.thrust::detail::fill_functor" = type { i32 } | |
%"struct.thrust::detail::cons.35" = type { i32 } | |
%"class.thrust::system::cuda::detail::bulk_::parallel_group" = type { %"class.thrust::system::cuda::detail::bulk_::detail::group_detail::group_base" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::group_detail::group_base" = type { %"class.thrust::system::cuda::detail::bulk_::concurrent_group", i32, i32 } | |
%"class.thrust::system::cuda::detail::bulk_::concurrent_group" = type { %"class.thrust::system::cuda::detail::bulk_::parallel_group.36", i32 } | |
%"class.thrust::system::cuda::detail::bulk_::parallel_group.36" = type { %"class.thrust::system::cuda::detail::bulk_::detail::group_detail::group_base.37" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::group_detail::group_base.37" = type { %"class.thrust::system::cuda::detail::bulk_::agent", i32, i32 } | |
%"class.thrust::system::cuda::detail::bulk_::agent" = type { i32 } | |
%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41" = type <{ %"class.thrust::system::cuda::detail::bulk_::detail::task_base.42", i32, [4 x i8] }> | |
%"class.thrust::system::cuda::detail::bulk_::detail::task_base.42" = type { %"class.thrust::system::cuda::detail::bulk_::detail::closure.43", %"class.thrust::system::cuda::detail::bulk_::parallel_group" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::closure.43" = type { %"struct.thrust::system::cuda::detail::for_each_n_detail::for_each_kernel", %"class.thrust::tuple.44" } | |
%"class.thrust::tuple.44" = type { %"struct.thrust::detail::cons.45" } | |
%"struct.thrust::detail::cons.45" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor", %"struct.thrust::detail::cons.46" } | |
%"struct.thrust::detail::cons.46" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.47" } | |
%"struct.thrust::detail::cons.47" = type { %"struct.thrust::detail::wrapped_function", %"struct.thrust::detail::cons.48" } | |
%"struct.thrust::detail::cons.48" = type { i64 } | |
%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53" = type <{ %"class.thrust::system::cuda::detail::bulk_::detail::task_base.54", i32, [4 x i8] }> | |
%"class.thrust::system::cuda::detail::bulk_::detail::task_base.54" = type { %"class.thrust::system::cuda::detail::bulk_::detail::closure.55", %"class.thrust::system::cuda::detail::bulk_::parallel_group" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::closure.55" = type { %"struct.thrust::system::cuda::detail::for_each_n_detail::for_each_kernel", %"class.thrust::tuple.56" } | |
%"class.thrust::tuple.56" = type { %"struct.thrust::detail::cons.57" } | |
%"struct.thrust::detail::cons.57" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor", %"struct.thrust::detail::cons.58" } | |
%"struct.thrust::detail::cons.58" = type { %"class.thrust::zip_iterator", %"struct.thrust::detail::cons.63" } | |
%"class.thrust::zip_iterator" = type { %"class.thrust::tuple.60" } | |
%"class.thrust::tuple.60" = type { %"struct.thrust::detail::cons.61" } | |
%"struct.thrust::detail::cons.61" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.62" } | |
%"struct.thrust::detail::cons.62" = type { i32* } | |
%"struct.thrust::detail::cons.63" = type { %"struct.thrust::detail::wrapped_function.64", %"struct.thrust::detail::cons.35" } | |
%"struct.thrust::detail::wrapped_function.64" = type { %"struct.thrust::detail::unary_transform_functor" } | |
%"struct.thrust::detail::unary_transform_functor" = type { %"struct.thrust::identity" } | |
%"struct.thrust::identity" = type { i8 } | |
%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75" = type <{ %"class.thrust::system::cuda::detail::bulk_::detail::task_base.76", i32, [4 x i8] }> | |
%"class.thrust::system::cuda::detail::bulk_::detail::task_base.76" = type { %"class.thrust::system::cuda::detail::bulk_::detail::closure.77", %"class.thrust::system::cuda::detail::bulk_::parallel_group" } | |
%"class.thrust::system::cuda::detail::bulk_::detail::closure.77" = type { %"struct.thrust::system::cuda::detail::for_each_n_detail::for_each_kernel", %"class.thrust::tuple.78" } | |
%"class.thrust::tuple.78" = type { %"struct.thrust::detail::cons.79" } | |
%"struct.thrust::detail::cons.79" = type { %"struct.thrust::system::cuda::detail::bulk_::detail::cursor", %"struct.thrust::detail::cons.80" } | |
%"struct.thrust::detail::cons.80" = type { %"class.thrust::zip_iterator", %"struct.thrust::detail::cons.81" } | |
%"struct.thrust::detail::cons.81" = type { %"struct.thrust::detail::wrapped_function.64", %"struct.thrust::detail::cons.82" } | |
%"struct.thrust::detail::cons.82" = type { i64 } | |
$_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEjNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_ = comdat any | |
$_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEmNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_ = comdat any | |
$_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEEjSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_ = comdat any | |
$_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEElSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_ = comdat any | |
@_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE = internal addrspace(3) global %"class.thrust::system::cuda::detail::bulk_::uninitialized" undef, align 8 | |
@_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE = external addrspace(3) global [0 x i32], align 4 | |
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @_ZL21__nvvm_reflect_anchorv to i8*)], section "llvm.metadata" | |
; Function Attrs: norecurse nounwind readnone | |
define internal i32 @_ZL21__nvvm_reflect_anchorv() #0 { | |
ret i32 0 | |
} | |
; Function Attrs: convergent nounwind | |
define void @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEjNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_(%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* byval nocapture readonly align 8) local_unnamed_addr #1 comdat { | |
%2 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 0, i32 1, i32 0, i32 1 | |
%3 = load i32, i32* %2, align 8, !tbaa !7 | |
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #4, !range !13 | |
%5 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #4, !range !14 | |
%6 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 1 | |
%7 = load i32, i32* %6, align 8, !tbaa !15 | |
%8 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #4, !range !17 | |
%9 = add i32 %8, %7 | |
%10 = icmp eq i32 %5, 0 | |
br i1 %10, label %11, label %15 | |
; <label>:11: ; preds = %1 | |
%12 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 0, i32 1, i32 0, i32 0, i32 1 | |
%13 = load i32, i32* %12, align 4, !tbaa !18 | |
%14 = sext i32 %13 to i64 | |
store i32 0, i32* addrspacecast (i32 addrspace(3)* bitcast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to i32 addrspace(3)*) to i32*), align 8, !tbaa !19 | |
store i8* addrspacecast (i8 addrspace(3)* bitcast ([0 x i32] addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE to i8 addrspace(3)*) to i8*), i8** bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 8) to i8**), align 8, !tbaa !21 | |
store i64 %14, i64* bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 16) to i64*), align 8, !tbaa !25 | |
br label %15 | |
; <label>:15: ; preds = %11, %1 | |
tail call void @llvm.nvvm.barrier0() #4 | |
%16 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0 | |
%17 = load i32, i32* %16, align 8 | |
%18 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1, i32 0 | |
%19 = load i32, i32* %18, align 4 | |
%20 = mul nsw i32 %4, %3 | |
%21 = mul nsw i32 %9, %4 | |
%22 = add nsw i32 %21, %5 | |
%23 = icmp ult i32 %22, %19 | |
br i1 %23, label %24, label %37 | |
; <label>:24: ; preds = %15 | |
%25 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0 | |
%26 = load i32*, i32** %25, align 8 | |
%27 = zext i32 %22 to i64 | |
%28 = getelementptr inbounds i32, i32* %26, i64 %27 | |
%29 = zext i32 %20 to i64 | |
br label %30 | |
; <label>:30: ; preds = %30, %24 | |
%31 = phi i32* [ %28, %24 ], [ %34, %30 ] | |
%32 = phi i32 [ %22, %24 ], [ %33, %30 ] | |
store i32 %17, i32* %31, align 4, !tbaa !26 | |
%33 = add i32 %32, %20 | |
%34 = getelementptr inbounds i32, i32* %31, i64 %29 | |
%35 = icmp ult i32 %33, %19 | |
br i1 %35, label %30, label %36 | |
; <label>:36: ; preds = %30 | |
br label %37 | |
; <label>:37: ; preds = %36, %15 | |
ret void | |
} | |
; Function Attrs: nounwind readnone | |
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 | |
; Function Attrs: nounwind readnone | |
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 | |
; Function Attrs: nounwind readnone | |
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 | |
; Function Attrs: convergent nounwind | |
declare void @llvm.nvvm.barrier0() #3 | |
; Function Attrs: convergent nounwind | |
define void @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEmNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_(%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* byval nocapture readonly align 8) local_unnamed_addr #1 comdat { | |
%2 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 0, i32 1, i32 0, i32 1 | |
%3 = load i32, i32* %2, align 8, !tbaa !7 | |
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #4, !range !13 | |
%5 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #4, !range !14 | |
%6 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 1 | |
%7 = load i32, i32* %6, align 8, !tbaa !27 | |
%8 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #4, !range !17 | |
%9 = add i32 %8, %7 | |
%10 = icmp eq i32 %5, 0 | |
br i1 %10, label %11, label %15 | |
; <label>:11: ; preds = %1 | |
%12 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 0, i32 1, i32 0, i32 0, i32 1 | |
%13 = load i32, i32* %12, align 4, !tbaa !18 | |
%14 = sext i32 %13 to i64 | |
store i32 0, i32* addrspacecast (i32 addrspace(3)* bitcast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to i32 addrspace(3)*) to i32*), align 8, !tbaa !19 | |
store i8* addrspacecast (i8 addrspace(3)* bitcast ([0 x i32] addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE to i8 addrspace(3)*) to i8*), i8** bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 8) to i8**), align 8, !tbaa !21 | |
store i64 %14, i64* bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 16) to i64*), align 8, !tbaa !25 | |
br label %15 | |
; <label>:15: ; preds = %11, %1 | |
tail call void @llvm.nvvm.barrier0() #4 | |
%16 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0 | |
%17 = load i32, i32* %16, align 8 | |
%18 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1, i32 0 | |
%19 = load i64, i64* %18, align 8 | |
%20 = mul nsw i32 %4, %3 | |
%21 = sext i32 %20 to i64 | |
%22 = mul nsw i32 %9, %4 | |
%23 = add nsw i32 %22, %5 | |
%24 = sext i32 %23 to i64 | |
%25 = icmp ult i64 %24, %19 | |
br i1 %25, label %26, label %37 | |
; <label>:26: ; preds = %15 | |
%27 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0 | |
%28 = load i32*, i32** %27, align 8 | |
%29 = getelementptr inbounds i32, i32* %28, i64 %24 | |
br label %30 | |
; <label>:30: ; preds = %30, %26 | |
%31 = phi i32* [ %34, %30 ], [ %29, %26 ] | |
%32 = phi i64 [ %33, %30 ], [ %24, %26 ] | |
store i32 %17, i32* %31, align 4, !tbaa !26 | |
%33 = add i64 %32, %21 | |
%34 = getelementptr inbounds i32, i32* %31, i64 %21 | |
%35 = icmp ult i64 %33, %19 | |
br i1 %35, label %30, label %36 | |
; <label>:36: ; preds = %30 | |
br label %37 | |
; <label>:37: ; preds = %36, %15 | |
ret void | |
} | |
; Function Attrs: convergent nounwind | |
define void @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEEjSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_(%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* byval nocapture readonly align 8) local_unnamed_addr #1 comdat { | |
%2 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 0, i32 1, i32 0, i32 1 | |
%3 = load i32, i32* %2, align 8, !tbaa !7 | |
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #4, !range !13 | |
%5 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #4, !range !14 | |
%6 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 1 | |
%7 = load i32, i32* %6, align 8, !tbaa !29 | |
%8 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #4, !range !17 | |
%9 = add i32 %8, %7 | |
%10 = icmp eq i32 %5, 0 | |
br i1 %10, label %11, label %15 | |
; <label>:11: ; preds = %1 | |
%12 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 0, i32 1, i32 0, i32 0, i32 1 | |
%13 = load i32, i32* %12, align 4, !tbaa !18 | |
%14 = sext i32 %13 to i64 | |
store i32 0, i32* addrspacecast (i32 addrspace(3)* bitcast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to i32 addrspace(3)*) to i32*), align 8, !tbaa !19 | |
store i8* addrspacecast (i8 addrspace(3)* bitcast ([0 x i32] addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE to i8 addrspace(3)*) to i8*), i8** bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 8) to i8**), align 8, !tbaa !21 | |
store i64 %14, i64* bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 16) to i64*), align 8, !tbaa !25 | |
br label %15 | |
; <label>:15: ; preds = %11, %1 | |
tail call void @llvm.nvvm.barrier0() #4 | |
%16 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1, i32 0 | |
%17 = load i32, i32* %16, align 4 | |
%18 = mul nsw i32 %4, %3 | |
%19 = mul nsw i32 %9, %4 | |
%20 = add nsw i32 %19, %5 | |
%21 = zext i32 %20 to i64 | |
%22 = icmp ult i32 %20, %17 | |
br i1 %22, label %23, label %41 | |
; <label>:23: ; preds = %15 | |
%24 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 | |
%25 = load i32*, i32** %24, align 8 | |
%26 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 1, i32 0 | |
%27 = load i32*, i32** %26, align 8 | |
%28 = getelementptr inbounds i32, i32* %27, i64 %21 | |
%29 = getelementptr inbounds i32, i32* %25, i64 %21 | |
%30 = zext i32 %18 to i64 | |
br label %31 | |
; <label>:31: ; preds = %31, %23 | |
%32 = phi i32* [ %28, %23 ], [ %38, %31 ] | |
%33 = phi i32* [ %29, %23 ], [ %37, %31 ] | |
%34 = phi i32 [ %20, %23 ], [ %36, %31 ] | |
%35 = load i32, i32* %33, align 4, !tbaa !26 | |
store i32 %35, i32* %32, align 4, !tbaa !26 | |
%36 = add i32 %34, %18 | |
%37 = getelementptr inbounds i32, i32* %33, i64 %30 | |
%38 = getelementptr inbounds i32, i32* %32, i64 %30 | |
%39 = icmp ult i32 %36, %17 | |
br i1 %39, label %31, label %40 | |
; <label>:40: ; preds = %31 | |
br label %41 | |
; <label>:41: ; preds = %40, %15 | |
ret void | |
} | |
; Function Attrs: convergent nounwind | |
define void @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEElSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_(%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* byval nocapture readonly align 8) local_unnamed_addr #1 comdat { | |
%2 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 0, i32 1, i32 0, i32 1 | |
%3 = load i32, i32* %2, align 8, !tbaa !7 | |
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #4, !range !13 | |
%5 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #4, !range !14 | |
%6 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 1 | |
%7 = load i32, i32* %6, align 8, !tbaa !31 | |
%8 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #4, !range !17 | |
%9 = add i32 %8, %7 | |
%10 = icmp eq i32 %5, 0 | |
br i1 %10, label %11, label %15 | |
; <label>:11: ; preds = %1 | |
%12 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 0, i32 1, i32 0, i32 0, i32 1 | |
%13 = load i32, i32* %12, align 4, !tbaa !18 | |
%14 = sext i32 %13 to i64 | |
store i32 0, i32* addrspacecast (i32 addrspace(3)* bitcast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to i32 addrspace(3)*) to i32*), align 8, !tbaa !19 | |
store i8* addrspacecast (i8 addrspace(3)* bitcast ([0 x i32] addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail20s_data_segment_beginE to i8 addrspace(3)*) to i8*), i8** bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 8) to i8**), align 8, !tbaa !21 | |
store i64 %14, i64* bitcast (i8* getelementptr (%"class.thrust::system::cuda::detail::bulk_::uninitialized", %"class.thrust::system::cuda::detail::bulk_::uninitialized"* addrspacecast (%"class.thrust::system::cuda::detail::bulk_::uninitialized" addrspace(3)* @_ZN6thrust6system4cuda6detail5bulk_6detail12_GLOBAL__N_119s_on_chip_allocatorE to %"class.thrust::system::cuda::detail::bulk_::uninitialized"*), i64 0, i32 0, i32 0, i64 16) to i64*), align 8, !tbaa !25 | |
br label %15 | |
; <label>:15: ; preds = %11, %1 | |
tail call void @llvm.nvvm.barrier0() #4 | |
%16 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 1, i32 0 | |
%17 = load i64, i64* %16, align 8 | |
%18 = mul nsw i32 %4, %3 | |
%19 = sext i32 %18 to i64 | |
%20 = mul nsw i32 %9, %4 | |
%21 = add nsw i32 %20, %5 | |
%22 = sext i32 %21 to i64 | |
%23 = icmp slt i64 %22, %17 | |
br i1 %23, label %24, label %41 | |
; <label>:24: ; preds = %15 | |
%25 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 | |
%26 = load i32*, i32** %25, align 8 | |
%27 = getelementptr inbounds %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75", %"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 1, i32 0 | |
%28 = load i32*, i32** %27, align 8 | |
%29 = getelementptr inbounds i32, i32* %28, i64 %22 | |
%30 = getelementptr inbounds i32, i32* %26, i64 %22 | |
br label %31 | |
; <label>:31: ; preds = %31, %24 | |
%32 = phi i32* [ %38, %31 ], [ %29, %24 ] | |
%33 = phi i32* [ %37, %31 ], [ %30, %24 ] | |
%34 = phi i64 [ %36, %31 ], [ %22, %24 ] | |
%35 = load i32, i32* %33, align 4, !tbaa !26 | |
store i32 %35, i32* %32, align 4, !tbaa !26 | |
%36 = add nsw i64 %34, %19 | |
%37 = getelementptr inbounds i32, i32* %33, i64 %19 | |
%38 = getelementptr inbounds i32, i32* %32, i64 %19 | |
%39 = icmp slt i64 %36, %17 | |
br i1 %39, label %31, label %40 | |
; <label>:40: ; preds = %31 | |
br label %41 | |
; <label>:41: ; preds = %40, %15 | |
ret void | |
} | |
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="-satom" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="-satom" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #2 = { nounwind readnone } | |
attributes #3 = { convergent nounwind } | |
attributes #4 = { nounwind } | |
!nvvm.annotations = !{!0, !1, !2, !3} | |
!llvm.module.flags = !{!4, !5} | |
!llvm.ident = !{!6} | |
!0 = !{void (%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task"*)* @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEjNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_, !"kernel", i32 1} | |
!1 = !{void (%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.41"*)* @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSL_23device_generate_functorINSL_12fill_functorIiEEEEvEEmNS_9null_typeESS_SS_SS_SS_SS_EEEEEEEEvT0_, !"kernel", i32 1} | |
!2 = !{void (%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.53"*)* @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEEjSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_, !"kernel", i32 1} | |
!3 = !{void (%"class.thrust::system::cuda::detail::bulk_::detail::cuda_task.75"*)* @_ZN6thrust6system4cuda6detail5bulk_6detail15launch_by_valueILj0ENS4_9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSG_INS_10device_ptrIiEEPiNS_9null_typeESN_SN_SN_SN_SN_SN_SN_EEEENS_6detail16wrapped_functionINSQ_23unary_transform_functorINS_8identityIiEEEEvEElSN_SN_SN_SN_SN_SN_EEEEEEEEvT0_, !"kernel", i32 1} | |
!4 = !{i32 4, !"nvvm-reflect-ftz", i32 0} | |
!5 = !{i32 1, !"PIC Level", i32 2} | |
!6 = !{!"clang version 4.0.0 (tags/RELEASE_400/final)"} | |
!7 = !{!8, !10, i64 16} | |
!8 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail12group_detail10group_baseINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEE", !9, i64 0, !10, i64 16, !10, i64 20} | |
!9 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_16concurrent_groupINS3_5agentILm1EEELm0EEE", !10, i64 12} | |
!10 = !{!"int", !11, i64 0} | |
!11 = !{!"omnipotent char", !12, i64 0} | |
!12 = !{!"Simple C++ TBAA"} | |
!13 = !{i32 1, i32 1025} | |
!14 = !{i32 0, i32 1024} | |
!15 = !{!16, !10, i64 56} | |
!16 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSK_23device_generate_functorINSK_12fill_functorIiEEEEvEEjNS_9null_typeESR_SR_SR_SR_SR_EEEEEE", !10, i64 56} | |
!17 = !{i32 0, i32 2147483647} | |
!18 = !{!9, !10, i64 12} | |
!19 = !{!20, !10, i64 0} | |
!20 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail27singleton_on_chip_allocator5mutexE", !10, i64 0} | |
!21 = !{!22, !23, i64 0} | |
!22 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail2osE", !23, i64 0, !24, i64 8} | |
!23 = !{!"any pointer", !11, i64 0} | |
!24 = !{!"long", !11, i64 0} | |
!25 = !{!22, !24, i64 8} | |
!26 = !{!10, !10, i64 0} | |
!27 = !{!28, !10, i64 64} | |
!28 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_10device_ptrIiEENS_6detail16wrapped_functionINSK_23device_generate_functorINSK_12fill_functorIiEEEEvEEmNS_9null_typeESR_SR_SR_SR_SR_EEEEEE", !10, i64 64} | |
!29 = !{!30, !10, i64 64} | |
!30 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSF_INS_10device_ptrIiEEPiNS_9null_typeESM_SM_SM_SM_SM_SM_SM_EEEENS_6detail16wrapped_functionINSP_23unary_transform_functorINS_8identityIiEEEEvEEjSM_SM_SM_SM_SM_SM_EEEEEE", !10, i64 64} | |
!31 = !{!32, !10, i64 72} | |
!32 = !{!"_ZTSN6thrust6system4cuda6detail5bulk_6detail9cuda_taskINS3_14parallel_groupINS3_16concurrent_groupINS3_5agentILm1EEELm0EEELm0EEENS4_7closureINS2_17for_each_n_detail15for_each_kernelENS_5tupleINS4_6cursorILj0EEENS_12zip_iteratorINSF_INS_10device_ptrIiEEPiNS_9null_typeESM_SM_SM_SM_SM_SM_SM_EEEENS_6detail16wrapped_functionINSP_23unary_transform_functorINS_8identityIiEEEEvEElSM_SM_SM_SM_SM_SM_EEEEEE", !10, i64 72} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment