Skip to content

Instantly share code, notes, and snippets.

@elfrank
Created January 17, 2013 18:08
Show Gist options
  • Save elfrank/4558120 to your computer and use it in GitHub Desktop.
Save elfrank/4558120 to your computer and use it in GitHub Desktop.
StructuredBuffer<int> g_Permutations : register(t0);
StructuredBuffer<int> g_List : register(t1);
RWStructuredBuffer<int> g_ChunksBuffer : register (u0);
RWStructuredBuffer<int> g_MergeBuffer : register (u1);
#define GROUP_SIZE_X 2
#define GROUP_SIZE_Y 2
#define NUM_THREADS_X 2
#define NUM_THREADS_Y 2
#define TILE_SIZE 4
/*[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)]
void SortChunksEven( uint3 DTid : SV_DispatchThreadID )
{
}
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)]
void SortChunksOdd( uint3 DTid : SV_DispatchThreadID )
{
}*/
[numthreads( NUM_THREADS_X*NUM_THREADS_X, NUM_THREADS_Y*NUM_THREADS_Y, 1)]
void SortChunks2( uint3 DTid : SV_DispatchThreadID )
{
int stride = (NUM_THREADS_X*NUM_THREADS_X) * (GROUP_SIZE_X/2);
int offset = DTid.y*stride+DTid.x;
int minP, maxP;
// Even
if( (offset % 2 == 0) )
{
if( (offset+1) % (TILE_SIZE*TILE_SIZE) > 0 )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}
}
DeviceMemoryBarrierWithGroupSync();
// Odd
if( (offset % 2 == 1) )
{
if( (offset+1) % (TILE_SIZE*TILE_SIZE) > 0 )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}
}
}
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)]
void SortChunks( uint3 DTid : SV_DispatchThreadID )
{
int stride = NUM_THREADS_X * GROUP_SIZE_X;
int offset = DTid.y*stride+DTid.x;
int minP, maxP;
// Even
if( (offset % 2 == 0) )
{
if( (offset+1) % TILE_SIZE > 0 )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}
}
DeviceMemoryBarrierWithGroupSync();
// Odd
if( (offset % 2 == 1) )
{
if( (offset+1) % TILE_SIZE > 0 )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}
}
/*if(!(offset % 2) && ((offset+1) % TILE_SIZE > 0) )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}*/
/*
if(!(offset % 2) && (offset%(TILE_SIZE-1)) <= )
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}*/
/*if(!((offset % 2 == 0) || ((TILE_SIZE-1) == (offset%TILE_SIZE))))
{
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]);
g_ChunksBuffer[offset] = minP;
g_ChunksBuffer[offset+1] = maxP;
}*/
//g_ChunksBuffer[offset] = offset;
}
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)]
void MergeChunks( uint3 DTid : SV_DispatchThreadID )
{
int stride = NUM_THREADS_X * GROUP_SIZE_X;
int offset = DTid.y*stride+DTid.x;
const unsigned int chunk = offset/TILE_SIZE;
int counter = 0, i;
/*
// Attempt 1... FAIL
for(i = 0; i < chunk*TILE_SIZE; i++)
{
if(g_ChunksBuffer[i] < g_ChunksBuffer[offset])
{
counter++;
}
}
for(i = (chunk*TILE_SIZE+TILE_SIZE); i < TILE_SIZE*GROUP_SIZE_X*GROUP_SIZE_Y; i++)
{
if(g_ChunksBuffer[i] < g_ChunksBuffer[offset])
{
counter++;
}
}*/
// Attempt 2... FAIL
int end, start, middle;
for(i = 0; i < chunk; i++)
{
start = i*TILE_SIZE, end = start+TILE_SIZE-1;
[allow_uav_condition]while(end>start)
{
middle = (start+end)/2;
if (g_ChunksBuffer[offset] < g_ChunksBuffer[middle])
{
end = middle-1;
}
else
{
start = middle+1;
}
}
counter += (end %TILE_SIZE);
if(g_ChunksBuffer[offset] > g_ChunksBuffer[end])
{
counter++;
}
}
for(i = chunk+1; i < GROUP_SIZE_X*GROUP_SIZE_Y; i++)
{
start = i*TILE_SIZE, end = start+TILE_SIZE-1;
[allow_uav_condition]while(end>start)
{
middle = (start+end)/2;
if (g_ChunksBuffer[offset] < g_ChunksBuffer[middle])
{
end = middle-1;
}
else
{
start = middle+1;
}
}
counter += (end %TILE_SIZE);
if(g_ChunksBuffer[offset] > g_ChunksBuffer[end])
{
counter++;
}
}
g_MergeBuffer[counter+(offset%TILE_SIZE)] = g_ChunksBuffer[offset];
//g_MergeBuffer[offset] = counter;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment