Last active
November 6, 2016 15:45
-
-
Save c910335/0bf3433aa0f792267725c3e3090d3aab to your computer and use it in GitHub Desktop.
I have no idea why this is faster than std::sort in cpp.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Array | |
def sort! | |
Array.intro_sort!(@buffer, @size) | |
self | |
end | |
def sort!(&block : T, T -> Int32) | |
Array.intro_sort!(@buffer, @size, block) | |
self | |
end | |
protected def self.intro_sort!(a, n) | |
return if n < 2 | |
quick_sort_for_intro_sort!(a, n, Math.log2(n).to_i * 2) | |
insertion_sort!(a, n) | |
end | |
protected def self.quick_sort_for_intro_sort!(a, n, d) | |
while n > 16 | |
if d == 0 | |
heap_sort!(a, n) | |
return | |
end | |
d -= 1 | |
shift_median!(a, n) | |
c = partition_for_quick_sort!(a, n) | |
quick_sort_for_intro_sort!(c, n - (c - a), d) | |
n = c - a | |
end | |
end | |
protected def self.heap_sort!(a, n) | |
(n / 2).downto 0 do |p| | |
heapify!(a, p, n) | |
end | |
while n > 1 | |
n -= 1 | |
a.value, a[n] = a[n], a.value | |
heapify!(a, 0, n) | |
end | |
end | |
protected def self.heapify!(a, p, n) | |
v, c = a[p], p | |
while c < (n - 1) / 2 | |
c = 2 * (c + 1) | |
c -= 1 if a[c] < a[c - 1] | |
break unless v < a[c] | |
a[p] = a[c] | |
p = c | |
end | |
if n & 1 == 0 && c == n / 2 - 1 | |
c = 2 * c + 1 | |
if v < a[c] | |
a[p] = a[c] | |
p = c | |
end | |
end | |
a[p] = v | |
end | |
protected def self.shift_median!(a, n) | |
b, c = a + n / 2, a + n - 1 | |
if a.value < b.value | |
if b.value < c.value | |
a.value, b.value = b.value, a.value | |
elsif a.value < c.value | |
a.value, c.value = c.value, a.value | |
end | |
elsif a.value < c.value | |
return | |
elsif b.value < c.value | |
a.value, c.value = c.value, a.value | |
else | |
a.value, b.value = b.value, a.value | |
end | |
end | |
protected def self.partition_for_quick_sort!(a, n) | |
v, l, r = a.value, a + 1, a + n | |
loop do | |
while l.value < v | |
l += 1 | |
end | |
r -= 1 | |
while v < r.value | |
r -= 1 | |
end | |
return l unless l < r | |
l.value, r.value = r.value, l.value | |
l += 1 | |
end | |
end | |
protected def self.insertion_sort!(a, n) | |
(1...n).each do |i| | |
l = a + i | |
v = l.value | |
p = l - 1 | |
while l > a && v < p.value | |
l.value = p.value | |
l, p = p, p - 1 | |
end | |
l.value = v | |
end | |
end | |
protected def self.intro_sort!(a, n, comp) | |
return if n < 2 | |
quick_sort_for_intro_sort!(a, n, Math.log2(n).to_i * 2, comp) | |
insertion_sort!(a, n, comp) | |
end | |
protected def self.quick_sort_for_intro_sort!(a, n, d, comp) | |
while n > 16 | |
if d == 0 | |
heap_sort!(a, n, comp) | |
return | |
end | |
d -= 1 | |
shift_median!(a, n, comp) | |
c = partition_for_quick_sort!(a, n, comp) | |
quick_sort_for_intro_sort!(c, n - (c - a), d, comp) | |
n = c - a | |
end | |
end | |
protected def self.heap_sort!(a, n, comp) | |
(n / 2).downto 0 do |p| | |
heapify!(a, p, n, comp) | |
end | |
while n > 1 | |
n -= 1 | |
a.value, a[n] = a[n], a.value | |
heapify!(a, 0, n, comp) | |
end | |
end | |
protected def self.heapify!(a, p, n, comp) | |
v, c = a[p], p | |
while c < (n - 1) / 2 | |
c = 2 * (c + 1) | |
c -= 1 if comp.call(a[c], a[c - 1]) < 0 | |
break unless comp.call(v, a[c]) < 0 | |
a[p] = a[c] | |
p = c | |
end | |
if n & 1 == 0 && c == n / 2 - 1 | |
c = 2 * c + 1 | |
if comp.call(v, a[c]) < 0 | |
a[p] = a[c] | |
p = c | |
end | |
end | |
a[p] = v | |
end | |
protected def self.shift_median!(a, n, comp) | |
b, c = a + n / 2, a + n - 1 | |
if comp.call(a.value, b.value) < 0 | |
if comp.call(b.value, c.value) < 0 | |
a.value, b.value = b.value, a.value | |
elsif comp.call(a.value, c.value) < 0 | |
a.value, c.value = c.value, a.value | |
end | |
elsif comp.call(a.value, c.value) < 0 | |
return | |
elsif comp.call(b.value, c.value) < 0 | |
a.value, c.value = c.value, a.value | |
else | |
a.value, b.value = b.value, a.value | |
end | |
end | |
protected def self.partition_for_quick_sort!(a, n, comp) | |
v, l, r = a.value, a + 1, a + n | |
loop do | |
while comp.call(l.value, v) < 0 | |
l += 1 | |
end | |
r -= 1 | |
while comp.call(v, r.value) < 0 | |
r -= 1 | |
end | |
return l unless l < r | |
l.value, r.value = r.value, l.value | |
l += 1 | |
end | |
end | |
protected def self.insertion_sort!(a, n, comp) | |
(1...n).each do |i| | |
l = a + i | |
v = l.value | |
p = l - 1 | |
while l > a && comp.call(v, p.value) < 0 | |
l.value = p.value | |
l, p = p, p - 1 | |
end | |
l.value = v | |
end | |
end | |
end | |
# 256 elements with original quick sort 320.24k (± 6.41%) 1.43× slower | |
# 256 elements with std::sort in cpp 409.92k (± 0.47%) 1.12× slower | |
# 256 elements with intro sort 457.77k (± 0.38%) fastest | |
# 512 elements with original quick sort 147.88k (± 0.29%) 1.40× slower | |
# 512 elements with std::sort in cpp 175.08k (±10.52%) 1.18× slower | |
# 512 elements with intro sort 206.98k (± 4.39%) fastest | |
# 1024 elements with original quick sort 65.9k (± 9.92%) 1.47× slower | |
# 1024 elements with std::sort in cpp 82.29k (± 0.40%) 1.18× slower | |
# 1024 elements with intro sort 97.01k (± 6.08%) fastest | |
# 2048 elements with original quick sort 30.36k (±12.18%) 1.53× slower | |
# 2048 elements with std::sort in cpp 36.14k (±10.70%) 1.28× slower | |
# 2048 elements with intro sort 46.42k (± 0.29%) fastest | |
# 4096 elements with original quick sort 14.87k (± 0.55%) 1.46× slower | |
# 4096 elements with std::sort in cpp 17.42k (± 0.27%) 1.25× slower | |
# 4096 elements with intro sort 21.72k (± 7.04%) fastest | |
# 8192 elements with original quick sort 7.01k (± 0.51%) 1.46× slower | |
# 8192 elements with std::sort in cpp 8.11k (± 0.28%) 1.26× slower | |
# 8192 elements with intro sort 10.24k (± 8.49%) fastest | |
# 16384 elements with original quick sort 3.27k (± 5.59%) 1.52× slower | |
# 16384 elements with std::sort in cpp 3.72k (± 6.07%) 1.33× slower | |
# 16384 elements with intro sort 4.96k (± 5.91%) fastest | |
# 32768 elements with original quick sort 1.57k (± 0.40%) 1.54× slower | |
# 32768 elements with std::sort in cpp 1.78k (± 0.24%) 1.35× slower | |
# 32768 elements with intro sort 2.41k (± 0.21%) fastest | |
# 65536 elements with original quick sort 744.71 (± 0.31%) 1.50× slower | |
# 65536 elements with std::sort in cpp 836.95 (± 0.28%) 1.34× slower | |
# 65536 elements with intro sort 1.12k (± 9.48%) fastest | |
# 131072 elements with original quick sort 354.39 (± 0.29%) 1.53× slower | |
# 131072 elements with std::sort in cpp 393.97 (± 0.31%) 1.38× slower | |
# 131072 elements with intro sort 543.47 (± 6.47%) fastest | |
# 262144 elements with original quick sort 169.09 (± 0.29%) 1.57× slower | |
# 262144 elements with std::sort in cpp 186.74 (± 0.31%) 1.42× slower | |
# 262144 elements with intro sort 265.07 (± 0.27%) fastest | |
# 524288 elements with original quick sort 80.71 (± 0.26%) 1.56× slower | |
# 524288 elements with std::sort in cpp 87.66 (± 6.17%) 1.43× slower | |
# 524288 elements with intro sort 125.71 (± 7.13%) fastest | |
# 1048576 elements with original quick sort 38.52 (± 0.29%) 1.54× slower | |
# 1048576 elements with std::sort in cpp 40.22 (±11.09%) 1.47× slower | |
# 1048576 elements with intro sort 59.27 (±10.47%) fastest | |
# 2097152 elements with original quick sort 18.16 (± 7.18%) 1.57× slower | |
# 2097152 elements with std::sort in cpp 19.61 (± 7.09%) 1.46× slower | |
# 2097152 elements with intro sort 28.54 (± 9.07%) fastest | |
# 4194304 elements with original quick sort 8.63 (± 8.65%) 1.59× slower | |
# 4194304 elements with std::sort in cpp 9.13 (± 9.25%) 1.50× slower | |
# 4194304 elements with intro sort 13.7 (± 7.14%) fastest | |
# 8388608 elements with original quick sort 4.23 (± 0.12%) 1.51× slower | |
# 8388608 elements with std::sort in cpp 4.41 (± 5.48%) 1.45× slower | |
# 8388608 elements with intro sort 6.4 (± 9.92%) fastest |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment