Created
November 14, 2020 01:12
-
-
Save antimon2/7a014c29c263f2f2188bec28224ec8ba to your computer and use it in GitHub Desktop.
BitonicSorter.jl.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:39:58.388000+09:00", | |
"start_time": "2020-11-14T00:39:58.021Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "versioninfo()", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "Julia Version 1.5.3\nCommit 788b2c77c1 (2020-11-09 13:37 UTC)\nPlatform Info:\n OS: Linux (x86_64-pc-linux-gnu)\n CPU: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz\n WORD_SIZE: 64\n LIBM: libopenlibm\n LLVM: libLLVM-9.0.1 (ORCJIT, skylake)\nEnvironment:\n JULIA_NUM_THREADS = 4\n" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## BitonicSorter" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "ref: \n\n+ [Bitonic sorter : Wikipedia(en)](https://en.wikipedia.org/wiki/Bitonic_sorter)\n+ [バイトニックソート : Wikipedia(ja)](https://ja.wikipedia.org/wiki/%E3%83%90%E3%82%A4%E3%83%88%E3%83%8B%E3%83%83%E3%82%AF%E3%82%BD%E3%83%BC%E3%83%88)" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:39:59.923000+09:00", | |
"start_time": "2020-11-14T00:40:00.721Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "module BitonicSorter\n\nusing Base.Sort: Algorithm\nusing Base.Order: Ordering, lt\n\nexport BitonicSort\n\nstruct BitonicSortAlg <: Algorithm end\nconst BitonicSort = BitonicSortAlg()\n\nfunction Base.sort!(x::AbstractVector, lo::Integer, hi::Integer, ::BitonicSortAlg, o::Ordering)\n lo ≥ hi && return x\n\n fullsize::Int = hi - lo\n d = sizeof(Int) * 8 - leading_zeros(fullsize - 1) # == ceil(Int, log(2, fullsize))\n\n for p = 1:d, q = 1:p\n _sort_kernel!(x, lo, hi, p, q, o)\n end\n return x\nend\n\nfunction _sort_kernel!(x::AbstractVector, lo, hi, p, q, o)\n # @assert p ≥ q\n fullsize_1 = hi - lo\n d = 1 << UInt(p - q)\n for s = 0:2d:fullsize_1, ioff = 0:d-1\n joff = q == 1 ? (2d - ioff - 1) : ioff + d\n i = lo + s + ioff\n j = lo + s + joff\n if !lt(o, x[i], x[j])\n x[i], x[j] = x[j], x[i]\n end\n end\nend\n\nend # module", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "Main.BitonicSorter" | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:03.037000+09:00", | |
"start_time": "2020-11-14T00:40:03.969Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "module MTBitonicSorter\n\nusing Base.Sort: Algorithm\nusing Base.Order: Ordering, lt\nusing Base.Threads\n\nexport MTBitonicSort\n\nstruct MTBitonicSortAlg <: Algorithm end\nconst MTBitonicSort = MTBitonicSortAlg()\nconst PARALLEL_THRESHOLD = 4096\nconst PARALLEL_THRESHOLD_D = 12 # == log(2, PARALLEL_THRESHOLD)\n\nfunction Base.sort!(x::AbstractVector, lo::Integer, hi::Integer, ::MTBitonicSortAlg, o::Ordering)\n lo ≥ hi && return x\n\n fullsize_1::Int = hi - lo\n d = sizeof(Int) * 8 - leading_zeros(fullsize_1) # == ceil(Int, log(2, fullsize)\n\n if nthreads() > 2 && fullsize_1 + 1 ≥ PARALLEL_THRESHOLD\n # multithreading\n for p = 1:d, q = 1:p\n if p - q ≥ PARALLEL_THRESHOLD_D\n bsz = 1 << UInt(p - q + 1)\n # @threads for off = lo:bsz:hi-1, boff = 0:PARALLEL_THRESHOLD:((bsz>>0x01)-1)\n @threads for (off, boff) in [(off, boff) for boff = 0:PARALLEL_THRESHOLD:((bsz>>0x01)-1), off = lo:bsz:hi-1]\n _sort_kernel!(x, off, off + PARALLEL_THRESHOLD - 1, p, q, o, boff)\n end\n else\n @threads for off = lo:PARALLEL_THRESHOLD:hi-1\n _sort_kernel!(x, off, off + PARALLEL_THRESHOLD - 1, p, q, o)\n end\n end\n end\n else\n # single threading\n for p = 1:d, q = 1:p\n _sort_kernel!(x, lo, hi, p, q, o)\n end\n end\n return x\nend\n\nfunction _sort_kernel!(x::AbstractVector, lo, hi, p, q, o, boff=0)\n # @assert p ≥ q\n fullsize_1 = hi - lo\n d = 1 << UInt(p - q)\n sz_1 = min(d - 1, fullsize_1)\n for s = 0:2d:fullsize_1, ioff = boff:boff+sz_1\n joff = q == 1 ? (2d - ioff - 1) : ioff + d\n i = lo + s + ioff\n j = lo + s + joff\n if !lt(o, x[i], x[j])\n x[i], x[j] = x[j], x[i]\n end\n end\nend\n\nend # module", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "Main.MTBitonicSorter" | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:04.880000+09:00", | |
"start_time": "2020-11-14T00:40:06.076Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using .BitonicSorter", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:05.831000+09:00", | |
"start_time": "2020-11-14T00:40:07.027Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using .MTBitonicSorter", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:07.588000+09:00", | |
"start_time": "2020-11-14T00:40:08.069Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x = [10, 30, 11, 20, 4, 330, 21, 110]", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "8-element Array{Int64,1}:\n 10\n 30\n 11\n 20\n 4\n 330\n 21\n 110" | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:08.972000+09:00", | |
"start_time": "2020-11-14T00:40:10.150Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "sort(x)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "8-element Array{Int64,1}:\n 4\n 10\n 11\n 20\n 21\n 30\n 110\n 330" | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:10.371000+09:00", | |
"start_time": "2020-11-14T00:40:11.468Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "sort(x; alg=BitonicSort)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "8-element Array{Int64,1}:\n 4\n 10\n 11\n 20\n 21\n 30\n 110\n 330" | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:40:18.452000+09:00", | |
"start_time": "2020-11-14T00:40:19.479Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "sort(x; alg=MTBitonicSort)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "8-element Array{Int64,1}:\n 4\n 10\n 11\n 20\n 21\n 30\n 110\n 330" | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Benchmark" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:41:00.925000+09:00", | |
"start_time": "2020-11-14T00:41:02.101Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using BenchmarkTools", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:41:43.387000+09:00", | |
"start_time": "2020-11-14T00:41:32.890Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x) setup=(x=rand(Float64, 2^16))", | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 0 bytes\n allocs estimate: 0\n --------------\n minimum time: 2.887 ms (0.00% GC)\n median time: 3.042 ms (0.00% GC)\n mean time: 3.051 ms (0.00% GC)\n maximum time: 4.414 ms (0.00% GC)\n --------------\n samples: 1607\n evals/sample: 1" | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:42:12.776000+09:00", | |
"start_time": "2020-11-14T00:42:03.228Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x; alg=BitonicSort) setup=(x=rand(Float64, 2^16))", | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 0 bytes\n allocs estimate: 0\n --------------\n minimum time: 11.832 ms (0.00% GC)\n median time: 12.349 ms (0.00% GC)\n mean time: 12.380 ms (0.00% GC)\n maximum time: 14.479 ms (0.00% GC)\n --------------\n samples: 402\n evals/sample: 1" | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:42:12.776000+09:00", | |
"start_time": "2020-11-14T00:42:03.228Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x; alg=MTBitonicSort) setup=(x=rand(Float64, 2^16))", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 412.19 KiB\n allocs estimate: 2872\n --------------\n minimum time: 3.348 ms (0.00% GC)\n median time: 3.459 ms (0.00% GC)\n mean time: 3.621 ms (0.68% GC)\n maximum time: 6.591 ms (20.88% GC)\n --------------\n samples: 1359\n evals/sample: 1" | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:42:52.949000+09:00", | |
"start_time": "2020-11-14T00:42:39.970Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x) setup=(x=rand(Float64, 2^24))", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 0 bytes\n allocs estimate: 0\n --------------\n minimum time: 1.193 s (0.00% GC)\n median time: 1.195 s (0.00% GC)\n mean time: 1.199 s (0.00% GC)\n maximum time: 1.215 s (0.00% GC)\n --------------\n samples: 4\n evals/sample: 1" | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:43:20.434000+09:00", | |
"start_time": "2020-11-14T00:42:39.972Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x; alg=BitonicSort) setup=(x=rand(Float64, 2^24))", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 0 bytes\n allocs estimate: 0\n --------------\n minimum time: 6.633 s (0.00% GC)\n median time: 6.633 s (0.00% GC)\n mean time: 6.633 s (0.00% GC)\n maximum time: 6.633 s (0.00% GC)\n --------------\n samples: 1\n evals/sample: 1" | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-11-14T09:43:39.759000+09:00", | |
"start_time": "2020-11-14T00:42:39.973Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark sort!(x; alg=MTBitonicSort) setup=(x=rand(Float64, 2^24))", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "BenchmarkTools.Trial: \n memory estimate: 3.34 MiB\n allocs estimate: 6903\n --------------\n minimum time: 2.384 s (0.00% GC)\n median time: 2.397 s (0.00% GC)\n mean time: 2.395 s (0.01% GC)\n maximum time: 2.405 s (0.02% GC)\n --------------\n samples: 3\n evals/sample: 1" | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "julia-(4-threads)-1.5", | |
"display_name": "Julia (4 threads) 1.5.3", | |
"language": "julia" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.5.3" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "BitonicSorter.jl.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment