Created
August 7, 2020 01:46
-
-
Save kose-y/afe9f14fb4075ce276cb752d6391928b to your computer and use it in GitHub Desktop.
CUDA bitonic sort with Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code based on: https://discourse.julialang.org/t/gpu-sort-wip-gpu-1000x-faster-than-cpu-i-must-be-doing-something-wrong/20310/4 | |
# bitonic sorts a vector of any length. | |
using CUDA | |
function bisort!(shared, j, k) | |
tid = UInt(((blockIdx().x - 1) * blockDim().x + threadIdx().x)-1) | |
ixj = tid⊻UInt(j) | |
if ixj > tid | |
if (tid & k) == 0 | |
if shared[tid+1] > shared[ixj+1] | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
else | |
if shared[tid+1] < shared[ixj+1] | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
end | |
end | |
return | |
end | |
function bitonicsort_pow2!(cushared) | |
CUDA.@sync begin | |
k = UInt(2) | |
NUM = length(cushared) | |
nblocks = ceil(NUM/256) |> Int | |
while (k <= NUM) | |
j = div(k, 2) | |
while j >= 1 | |
@cuda threads = 256 blocks = nblocks bisort!(cushared, j, k) | |
j = div(j,2) | |
end | |
k = UInt(k*2) | |
end | |
end | |
end | |
function bitonicsort!(cushared) | |
N = length(cushared) | |
K = convert(Int, ceil(log2(N))) | |
paddedlen = 1<<K | |
cushared2 = similar(cushared, paddedlen) | |
fill!(cushared2, Inf) | |
cushared2[1:N] .= @view cushared[1:N] | |
bitonicsort_pow2!(cushared2) | |
cushared .= @view cushared2[1:N] | |
cushared | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment