Created
October 14, 2018 23:46
-
-
Save rharriso/df5854fea77b91cb7af382cf3f0b2fae to your computer and use it in GitHub Desktop.
Looking at Thrust: Thrust Prof Output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
==24803== NVPROF is profiling process 24803, command: ./main-thrust | |
==24803== Profiling application: ./main-thrust | |
==24803== Profiling result: | |
Type Time(%) Time Calls Avg Min Max Name | |
GPU activities: 74.98% 157.91ms 2 78.954ms 77.478ms 80.430ms void thrust::cuda_cub::core::_kernel_agent<thrust::cuda_cub::__parallel_for::ParallelForAgent<thrust::cuda_cub::__transform::unary_transform_f<thrust::counting_iterator<unsigned int, thrust::use_default, thrust::use_default, thrust::use_default>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::cuda_cub::__transform::no_stencil_tag, initRandomPrg, thrust::cuda_cub::__transform::always_true_predicate>, long>, thrust::cuda_cub::__transform::unary_transform_f<thrust::counting_iterator<unsigned int, thrust::use_default, thrust::use_default, thrust::use_default>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::cuda_cub::__transform::no_stencil_tag, initRandomPrg, thrust::cuda_cub::__transform::always_true_predicate>, long>(thrust::use_default, thrust::use_default) | |
22.78% 47.984ms 10 4.7984ms 4.7934ms 4.8072ms void thrust::cuda_cub::core::_kernel_agent<thrust::cuda_cub::__parallel_for::ParallelForAgent<thrust::cuda_cub::__transform::binary_transform_f<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::cuda_cub::__transform::no_stencil_tag, addPrg, thrust::cuda_cub::__transform::always_true_predicate>, long>, thrust::cuda_cub::__transform::binary_transform_f<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::cuda_cub::__transform::no_stencil_tag, addPrg, thrust::cuda_cub::__transform::always_true_predicate>, long>(thrust::device_ptr<float>, thrust::detail::normal_iterator<thrust::device_ptr<float>>) | |
2.24% 4.7185ms 3 1.5728ms 1.5658ms 1.5834ms void thrust::cuda_cub::core::_kernel_agent<thrust::cuda_cub::__parallel_for::ParallelForAgent<thrust::cuda_cub::__uninitialized_fill::functor<thrust::device_ptr<float>, float>, unsigned long>, thrust::cuda_cub::__uninitialized_fill::functor<thrust::device_ptr<float>, float>, unsigned long>(thrust::device_ptr<float>, float) | |
API calls: 58.03% 216.79ms 3 72.265ms 2.3659ms 210.11ms cudaFree | |
41.66% 155.63ms 3 51.877ms 310.45us 155.00ms cudaMalloc | |
0.14% 525.39us 94 5.5890us 600ns 211.77us cuDeviceGetAttribute | |
0.08% 287.94us 1 287.94us 287.94us 287.94us cuDeviceTotalMem | |
0.04% 131.16us 15 8.7440us 6.1100us 25.190us cudaLaunch | |
0.02% 75.650us 1 75.650us 75.650us 75.650us cuDeviceGetName | |
0.02% 72.710us 15 4.8470us 4.4100us 7.9100us cudaFuncGetAttributes | |
0.00% 10.040us 15 669ns 560ns 1.4200us cudaGetDevice | |
0.00% 9.3600us 15 624ns 530ns 1.2200us cudaDeviceGetAttribute | |
0.00% 5.8600us 30 195ns 140ns 480ns cudaSetupArgument | |
0.00% 4.9900us 30 166ns 130ns 330ns cudaPeekAtLastError | |
0.00% 4.1000us 3 1.3660us 650ns 2.5200us cuDeviceGetCount | |
0.00% 3.6800us 15 245ns 180ns 780ns cudaConfigureCall | |
0.00% 2.1800us 2 1.0900us 640ns 1.5400us cuDeviceGet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment