Created
January 17, 2013 18:08
-
-
Save elfrank/4558120 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
StructuredBuffer<int> g_Permutations : register(t0); | |
StructuredBuffer<int> g_List : register(t1); | |
RWStructuredBuffer<int> g_ChunksBuffer : register (u0); | |
RWStructuredBuffer<int> g_MergeBuffer : register (u1); | |
#define GROUP_SIZE_X 2 | |
#define GROUP_SIZE_Y 2 | |
#define NUM_THREADS_X 2 | |
#define NUM_THREADS_Y 2 | |
#define TILE_SIZE 4 | |
/*[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)] | |
void SortChunksEven( uint3 DTid : SV_DispatchThreadID ) | |
{ | |
} | |
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)] | |
void SortChunksOdd( uint3 DTid : SV_DispatchThreadID ) | |
{ | |
}*/ | |
[numthreads( NUM_THREADS_X*NUM_THREADS_X, NUM_THREADS_Y*NUM_THREADS_Y, 1)] | |
void SortChunks2( uint3 DTid : SV_DispatchThreadID ) | |
{ | |
int stride = (NUM_THREADS_X*NUM_THREADS_X) * (GROUP_SIZE_X/2); | |
int offset = DTid.y*stride+DTid.x; | |
int minP, maxP; | |
// Even | |
if( (offset % 2 == 0) ) | |
{ | |
if( (offset+1) % (TILE_SIZE*TILE_SIZE) > 0 ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
} | |
} | |
DeviceMemoryBarrierWithGroupSync(); | |
// Odd | |
if( (offset % 2 == 1) ) | |
{ | |
if( (offset+1) % (TILE_SIZE*TILE_SIZE) > 0 ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
} | |
} | |
} | |
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)] | |
void SortChunks( uint3 DTid : SV_DispatchThreadID ) | |
{ | |
int stride = NUM_THREADS_X * GROUP_SIZE_X; | |
int offset = DTid.y*stride+DTid.x; | |
int minP, maxP; | |
// Even | |
if( (offset % 2 == 0) ) | |
{ | |
if( (offset+1) % TILE_SIZE > 0 ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
} | |
} | |
DeviceMemoryBarrierWithGroupSync(); | |
// Odd | |
if( (offset % 2 == 1) ) | |
{ | |
if( (offset+1) % TILE_SIZE > 0 ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
} | |
} | |
/*if(!(offset % 2) && ((offset+1) % TILE_SIZE > 0) ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
}*/ | |
/* | |
if(!(offset % 2) && (offset%(TILE_SIZE-1)) <= ) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
}*/ | |
/*if(!((offset % 2 == 0) || ((TILE_SIZE-1) == (offset%TILE_SIZE)))) | |
{ | |
minP = min(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
maxP = max(g_ChunksBuffer[offset], g_ChunksBuffer[offset+1]); | |
g_ChunksBuffer[offset] = minP; | |
g_ChunksBuffer[offset+1] = maxP; | |
}*/ | |
//g_ChunksBuffer[offset] = offset; | |
} | |
[numthreads( NUM_THREADS_X, NUM_THREADS_Y, 1)] | |
void MergeChunks( uint3 DTid : SV_DispatchThreadID ) | |
{ | |
int stride = NUM_THREADS_X * GROUP_SIZE_X; | |
int offset = DTid.y*stride+DTid.x; | |
const unsigned int chunk = offset/TILE_SIZE; | |
int counter = 0, i; | |
/* | |
// Attempt 1... FAIL | |
for(i = 0; i < chunk*TILE_SIZE; i++) | |
{ | |
if(g_ChunksBuffer[i] < g_ChunksBuffer[offset]) | |
{ | |
counter++; | |
} | |
} | |
for(i = (chunk*TILE_SIZE+TILE_SIZE); i < TILE_SIZE*GROUP_SIZE_X*GROUP_SIZE_Y; i++) | |
{ | |
if(g_ChunksBuffer[i] < g_ChunksBuffer[offset]) | |
{ | |
counter++; | |
} | |
}*/ | |
// Attempt 2... FAIL | |
int end, start, middle; | |
for(i = 0; i < chunk; i++) | |
{ | |
start = i*TILE_SIZE, end = start+TILE_SIZE-1; | |
[allow_uav_condition]while(end>start) | |
{ | |
middle = (start+end)/2; | |
if (g_ChunksBuffer[offset] < g_ChunksBuffer[middle]) | |
{ | |
end = middle-1; | |
} | |
else | |
{ | |
start = middle+1; | |
} | |
} | |
counter += (end %TILE_SIZE); | |
if(g_ChunksBuffer[offset] > g_ChunksBuffer[end]) | |
{ | |
counter++; | |
} | |
} | |
for(i = chunk+1; i < GROUP_SIZE_X*GROUP_SIZE_Y; i++) | |
{ | |
start = i*TILE_SIZE, end = start+TILE_SIZE-1; | |
[allow_uav_condition]while(end>start) | |
{ | |
middle = (start+end)/2; | |
if (g_ChunksBuffer[offset] < g_ChunksBuffer[middle]) | |
{ | |
end = middle-1; | |
} | |
else | |
{ | |
start = middle+1; | |
} | |
} | |
counter += (end %TILE_SIZE); | |
if(g_ChunksBuffer[offset] > g_ChunksBuffer[end]) | |
{ | |
counter++; | |
} | |
} | |
g_MergeBuffer[counter+(offset%TILE_SIZE)] = g_ChunksBuffer[offset]; | |
//g_MergeBuffer[offset] = counter; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment