Skip to content

Instantly share code, notes, and snippets.

@JamesMenetrey
Created November 30, 2018 12:10
Show Gist options
  • Save JamesMenetrey/729b9e6488928ddf5aa69b5b5ec32c97 to your computer and use it in GitHub Desktop.
Save JamesMenetrey/729b9e6488928ddf5aa69b5b5ec32c97 to your computer and use it in GitHub Desktop.
Reduction tools in Cuda
#pragma once
#include "cudaTools.h"
#include "Device.h"
#include "Indice1D.h"
#include "Indice2D.h"
class ReductionAddTools
{
public:
template <typename T>
__device__ void reductionADD(T* tabSM, T* ptrDevResultatGM)
{
reductionIntraBlock<T>(tabSM);
reductionInterBlock<T>(tabSM, ptrDevResultatGM);
}
private:
template <typename T>
__device__ static void squash(T* tabSM, int halfSize)
{
const int TID_LOCALE = Indice2D::tidLocal();
if(TID_LOCALE < halfSize)
{
printf("Squash tabSM[%d]: %f into tabSM[%d]: %f = %f\n", TID_LOCALE + halfSize,
tabSM[TID_LOCALE + halfSize], TID_LOCALE, tabSM[TID_LOCALE],
tabSM[TID_LOCALE] + tabSM[TID_LOCALE + halfSize]);
tabSM[TID_LOCALE] = tabSM[TID_LOCALE] + tabSM[TID_LOCALE + halfSize];
}
}
template <typename T>
__device__ static void reductionIntraBlock(T* tabSM)
{
int halfSize = Indice2D::nbThreadBlock();
while(halfSize > 1)
{
halfSize /= 2;
squash(tabSM, halfSize);
__syncthreads();
}
}
template <typename T>
__device__ static void reductionInterBlock(T* tabSM,
T* ptrDevResultatGM)
{
const int TID_LOCALE = Indice2D::tidLocal();
if(TID_LOCALE == 0)
{
atomicAdd(ptrDevResultatGM, tabSM[TID_LOCALE]);
}
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment