Created
March 16, 2017 20:10
-
-
Save killeent/de9ee574a5e7e03c5f38f958387181a1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Block-wide reduction where each thread locally reduces N | |
// values before letting a single warp take over | |
template <typename T, typename ReduceOp, int N> | |
__device__ T reduceBlockN(T *smem, | |
int numVals, | |
ReduceOp reduceOp, | |
T init) { | |
T local = threadIdx.x < numVals ? smem[threadIdx.x] : init; | |
#pragma unroll | |
for (int i = 1; i < N; ++i) { | |
int index = threadIdx.x + (i * blockDim.x); | |
T next = index < numVals ? smem[index] : init; | |
local = reduceOp(local, next); | |
} | |
return reduceBlock<T, ReduceOp>(smem, blockDim.x < numVals ? blockDim.x : numVals, local, reduceOp, init); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment