Created
November 30, 2018 12:10
-
-
Save JamesMenetrey/729b9e6488928ddf5aa69b5b5ec32c97 to your computer and use it in GitHub Desktop.
Reduction tools in Cuda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "cudaTools.h" | |
#include "Device.h" | |
#include "Indice1D.h" | |
#include "Indice2D.h" | |
class ReductionAddTools | |
{ | |
public: | |
template <typename T> | |
__device__ void reductionADD(T* tabSM, T* ptrDevResultatGM) | |
{ | |
reductionIntraBlock<T>(tabSM); | |
reductionInterBlock<T>(tabSM, ptrDevResultatGM); | |
} | |
private: | |
template <typename T> | |
__device__ static void squash(T* tabSM, int halfSize) | |
{ | |
const int TID_LOCALE = Indice2D::tidLocal(); | |
if(TID_LOCALE < halfSize) | |
{ | |
printf("Squash tabSM[%d]: %f into tabSM[%d]: %f = %f\n", TID_LOCALE + halfSize, | |
tabSM[TID_LOCALE + halfSize], TID_LOCALE, tabSM[TID_LOCALE], | |
tabSM[TID_LOCALE] + tabSM[TID_LOCALE + halfSize]); | |
tabSM[TID_LOCALE] = tabSM[TID_LOCALE] + tabSM[TID_LOCALE + halfSize]; | |
} | |
} | |
template <typename T> | |
__device__ static void reductionIntraBlock(T* tabSM) | |
{ | |
int halfSize = Indice2D::nbThreadBlock(); | |
while(halfSize > 1) | |
{ | |
halfSize /= 2; | |
squash(tabSM, halfSize); | |
__syncthreads(); | |
} | |
} | |
template <typename T> | |
__device__ static void reductionInterBlock(T* tabSM, | |
T* ptrDevResultatGM) | |
{ | |
const int TID_LOCALE = Indice2D::tidLocal(); | |
if(TID_LOCALE == 0) | |
{ | |
atomicAdd(ptrDevResultatGM, tabSM[TID_LOCALE]); | |
} | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment