Last active
October 23, 2022 03:21
-
-
Save neoblizz/12b4934543cb7e4b47611aaad8156afd to your computer and use it in GitHub Desktop.
C++ wrapper around cooperative groups launch API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
// Includes CUDA | |
#include <cuda_runtime.h> | |
#include <cooperative_groups.h> | |
#include <utility> | |
namespace cg = cooperative_groups; | |
// Helper functions. | |
inline void for_each_argument_address(void**) {} | |
template <typename arg_t, typename... args_t> | |
inline void for_each_argument_address(void** collected_addresses, | |
arg_t&& arg, | |
args_t&&... args) { | |
collected_addresses[0] = const_cast<void*>(static_cast<const void*>(&arg)); | |
for_each_argument_address(collected_addresses + 1, | |
::std::forward<args_t>(args)...); | |
} | |
/** | |
* @brief Launch a given kernel using cudaLaunchCooperativeKernel API for | |
* Cooperative Groups (CG). This is a C++ wrapper that makes the C-based API of | |
* CG more accessible. See the example below for use: | |
* | |
* @note For an example use see the commented code below. | |
* @note GodBolt.org link: https://tinyurl.com/cgeg289hw | |
* | |
* @tparam func_t type of kernel | |
* @tparam args_t types of the arguments (variadic) | |
* @param stream cuda stream | |
* @param f function name/identifier | |
* @param block_dimensions block dimension (dim3) | |
* @param grid_dimensions grid dimension (dim3) | |
* @param shared_memory_bytes allocated dynamic shared memory in bytes | |
* @param args all the arguments to the function f | |
*/ | |
template <typename func_t, typename... args_t> | |
void launch_cooperative(cudaStream_t& stream, | |
const func_t& f, | |
dim3 block_dimensions, | |
dim3 grid_dimensions, | |
std::size_t shared_memory_bytes, | |
args_t&&... args) { | |
constexpr const auto non_zero_num_params = | |
sizeof...(args_t) == 0 ? 1 : sizeof...(args_t); | |
void* argument_ptrs[non_zero_num_params]; | |
for_each_argument_address(argument_ptrs, ::std::forward<args_t>(args)...); | |
cudaLaunchCooperativeKernel<func_t>( | |
&f, grid_dimensions, block_dimensions, | |
argument_ptrs, shared_memory_bytes, stream); | |
} | |
// EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uncommented example code, which basically uses the functions above to launch a simple kernel. https://tinyurl.com/cgeg289hw