|
// Copyright 2024 Igor Burago |
|
// SPDX-License-Identifier: ISC |
|
|
|
#include <math.h> // for rank_partition() |
|
#include <stddef.h> // ptrdiff_t |
|
#include <stdint.h> |
|
#include <string.h> // memcpy(), memmove(), memset() |
|
|
|
// Built-in u128 is not strictly required, but it makes mcg64 simpler and faster. |
|
typedef __uint128_t uint128_t; |
|
|
|
#define must_inline inline __attribute__((always_inline)) |
|
#define must_not_inline __attribute__((noinline)) |
|
#define inline_within __attribute__((flatten)) |
|
|
|
#define fickle(x) (__builtin_unpredictable((x))) |
|
#define usually(x, c) (__builtin_expect((x), (c))) |
|
#define likely(x) (usually(!!(x), 1)) |
|
#define unlikely(x) (usually(!!(x), 0)) |
|
|
|
#define glue_raw(x, y) x##y |
|
#define glue(x, y) glue_raw(x, y) |
|
#define dub(name) glue(name, glue(__dub_L, __LINE__)) |
|
|
|
#define apriori(x) extern void *(*compile_time_check_dummy(void))[ \ |
|
sizeof(struct { unsigned compile_time_check : (x) ? 1 : -1; })] |
|
|
|
// #define DEBUG_BUILD |
|
#if defined(DEBUG_BUILD) |
|
#include <stdio.h> |
|
#define surely(x) (likely(x) ? (void)0 : \ |
|
(fprintf(stderr, "FATAL: %s:%d: surely(%s) does not hold!\n", __FILE__, __LINE__, #x), \ |
|
fflush(NULL), \ |
|
__builtin_trap())) |
|
#define unreachable() surely(0 && "This code path must never be reached.") |
|
#else |
|
#define unreachable() (__builtin_unreachable()) |
|
static must_inline void surely_failure_deadend(void) { for (;;) { unreachable(); } } |
|
#define surely(x) (likely(x) ? (void)0 : surely_failure_deadend()) |
|
#endif |
|
|
|
#define swap(x, y) do { \ |
|
void *dub(xp) = &(x), *dub(yp) = &(y); \ |
|
apriori(sizeof(x) == sizeof(y)); \ |
|
unsigned char dub(tmp)[sizeof(x)]; \ |
|
memcpy(dub(tmp), dub(xp), sizeof(dub(tmp))); \ |
|
memmove(dub(xp), dub(yp), sizeof(dub(tmp))); \ |
|
memcpy(dub(yp), dub(tmp), sizeof(dub(tmp))); \ |
|
} while (0) |
|
|
|
static must_inline uint64_t |
|
seed_mix_u64(uint64_t seed, uint64_t x) { |
|
x += seed * 1122334455667788991u; |
|
return x ^ (x >> 32); |
|
} |
|
|
|
static must_inline uint64_t |
|
splitmix64_next(uint64_t *s) { |
|
uint64_t x = *s += 0x9E3779B97F4A7C15u; |
|
x = (x ^ (x>>30)) * 0xBF58476D1CE4E5B9u; |
|
x = (x ^ (x>>27)) * 0x94D049BB133111EBu; |
|
return x ^ (x>>31); |
|
} |
|
|
|
struct mcg64 { uint128_t s; }; |
|
static must_inline struct mcg64 |
|
mcg64(uint64_t seed) { |
|
uint64_t a0 = splitmix64_next(&seed); |
|
uint64_t a1 = splitmix64_next(&seed); |
|
struct mcg64 r = {0}; |
|
r.s = ((uint128_t)a1 << 64) | a0; |
|
return r; |
|
} |
|
static must_inline uint64_t |
|
mcg64_next(struct mcg64 *r) { |
|
return (r->s *= 0xDA942042E4DD58B5u) >> 64; |
|
} |
|
|
|
struct pcg32 { uint64_t s, t; }; |
|
static must_inline struct pcg32 |
|
pcg32(uint64_t seed) { |
|
struct pcg32 r = {0}; |
|
r.s = splitmix64_next(&seed); |
|
r.t = splitmix64_next(&seed) | 1; |
|
return r; |
|
} |
|
static must_inline uint32_t |
|
pcg32_next(struct pcg32 *r) { |
|
uint64_t s = r->s; |
|
r->s = r->s*0x5851F42D4C957F2Du + r->t; |
|
uint32_t x = (s ^ (s>>18)) >> 27, k = s>>59; |
|
return (x >> k) | (x << (-k & 31)); |
|
} |
|
|
|
// Fast and only slightly biased for (end-beg) approaching u32/u64 max. |
|
static must_inline uint32_t |
|
onto_range_u32(uint32_t x, uint32_t beg, uint32_t end) { |
|
surely(beg <= end); |
|
x = ((uint64_t)x * (end - beg)) >> 32; |
|
return beg + x; |
|
} |
|
static must_inline uint64_t |
|
onto_range_u64(uint64_t x, uint64_t beg, uint64_t end) { |
|
surely(beg <= end); |
|
x = ((uint128_t)x * (end - beg)) >> 64; |
|
return beg + x; |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// ARRAY VALUE AND INDEX TYPES |
|
//-----------------------------------------------------------------------------* |
|
|
|
typedef int item_t; |
|
#define item_less(x, y) ((x) < (y)) |
|
|
|
// Both signed and unsigned types are supported: |
|
typedef uint32_t index_t; |
|
|
|
// #define INDEX_RNG_USING_MCG |
|
#if defined(INDEX_RNG_USING_MCG) |
|
// MCG is faster than LCG or PCG, and is usually good enough for quicksort. |
|
typedef struct mcg64 index_rng_t; |
|
#define index_rng(seed) (mcg64(seed)) |
|
#define index_rand(rng, len) (onto_range_u64(mcg64_next(rng), 0, (len))) |
|
#else |
|
typedef struct pcg32 index_rng_t; |
|
#define index_rng(seed) (pcg32(seed)) |
|
#define index_rand(rng, len) (onto_range_u32(pcg32_next(rng), 0, (len))) |
|
#endif |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 1: One-off partial quicksort (no state shared between calls) |
|
//-----------------------------------------------------------------------------* |
|
|
|
#define max(x, y) ((x) > (y) ? (x) : (y)) |
|
enum { |
|
QUICKSORT_CUTOFF_HARD = 4, |
|
QUICKSORT_CUTOFF_SOFT = max(8, 64/sizeof(item_t)), |
|
}; |
|
apriori(QUICKSORT_CUTOFF_HARD >= 3); // We use the median-of-three pivoting. |
|
|
|
// Insertion sort. |
|
static inline void |
|
quicksort_cutoff_sort(item_t *arr, index_t len) { |
|
surely(len >= 0); |
|
for (index_t i = (len != 0); i != len; i++) { |
|
if fickle(!item_less(arr[i], arr[i-1])) { continue; } |
|
item_t x = arr[i]; |
|
index_t j = i; |
|
do { arr[j] = arr[j-1]; } while (--j != 0 && fickle(item_less(x, arr[j-1]))); |
|
arr[j] = x; |
|
} |
|
} |
|
// Hoare-partition elements in [0,len[ using the item at pivot_idx for pivot. |
|
static index_t |
|
partition(item_t *arr, index_t len, index_t pivot_idx) { |
|
surely(len >= 2), surely(0 <= pivot_idx), surely(pivot_idx < len); |
|
|
|
item_t piv = arr[pivot_idx]; |
|
arr[pivot_idx] = arr[0]; |
|
pivot_idx = item_less(arr[len-1], piv) ? len-1 : 0; |
|
arr[0] = arr[pivot_idx]; |
|
arr[pivot_idx] = piv; |
|
|
|
index_t i = 0, j = len-1; |
|
for (;;) { |
|
do { i++; } while (item_less(arr[i], piv)); |
|
do { j--; } while (item_less(piv, arr[j])); |
|
if (i >= j) { break; } |
|
swap(arr[i], arr[j]); |
|
} |
|
j += (pivot_idx != 0); |
|
swap(arr[j], arr[pivot_idx]); |
|
return j; |
|
} |
|
static must_inline index_t |
|
median_rand_3(index_rng_t *rng, item_t *arr, index_t len) { |
|
surely(len >= 3); |
|
|
|
index_t i[3]; |
|
i[0] = index_rand(rng, len-0); |
|
i[1] = index_rand(rng, len-1); |
|
i[2] = index_rand(rng, len-2); |
|
unsigned i1_past_i0 = (i[1] >= i[0]); |
|
i[1] += i1_past_i0; // Count i[1]-th item, jumping over i[0]-th. |
|
i[2] += (i[2] >= i[!i1_past_i0]); // Count i[2]-th, skipping min(i[0],i[1])-th... |
|
i[2] += (i[2] >= i[i1_past_i0]); // ...and then max(i[0],i[1])-th. |
|
|
|
unsigned gt01 = !!item_less(arr[i[1]], arr[i[0]]); |
|
unsigned lt02 = !!item_less(arr[i[0]], arr[i[2]]); |
|
unsigned lt12 = !!item_less(arr[i[1]], arr[i[2]]); |
|
return i[(gt01^lt02) + (lt02^lt12)]; |
|
} |
|
// Partially quick-sort elements from [l,r[ so that those whose ranks in |
|
// the sorted sequence belong to [lo,hi[ take their correct positions. |
|
static inline_within void |
|
partial_quicksort_rand_rec(index_rng_t *rng, item_t *arr, |
|
index_t lo, index_t hi, index_t l, index_t r) { |
|
for (;;) { |
|
if unlikely(r-l <= QUICKSORT_CUTOFF_HARD || |
|
(r-l <= QUICKSORT_CUTOFF_SOFT && lo <= l && r <= hi)) { |
|
quicksort_cutoff_sort(arr+l, r-l); |
|
return; |
|
} |
|
index_t pivot_idx = median_rand_3(rng, arr+l, r-l); |
|
index_t r_left = l + partition(arr+l, r-l, pivot_idx); |
|
index_t l_right = r_left + 1; |
|
unsigned go_left = (lo < r_left && r_left-l > 1); |
|
unsigned go_right = (l_right < hi && r-l_right > 1); |
|
|
|
switch fickle((go_left<<1) | go_right) { |
|
case 0: { return; } break; |
|
case 1: right: { l = l_right; } break; |
|
case 2: left: { r = r_left; } break; |
|
case 3: { |
|
if (r_left-l <= r-l_right) { |
|
partial_quicksort_rand_rec(rng, arr, lo, hi, l, r_left); |
|
goto right; |
|
} else { |
|
partial_quicksort_rand_rec(rng, arr, lo, hi, l_right, r); |
|
goto left; |
|
} |
|
unreachable(); |
|
} break; |
|
default: { unreachable(); } break; |
|
} |
|
} |
|
} |
|
static inline void |
|
partial_quicksort_rand(item_t *arr, index_t len, index_t lo, index_t hi) { |
|
surely(len >= 0), surely(0 <= lo), surely(lo <= hi), surely(hi <= len); |
|
if (len <= 1 || lo >= hi) { return; } |
|
|
|
uint64_t seed = -1; |
|
seed = seed_mix_u64(seed, (uintptr_t)&memset); |
|
seed = seed_mix_u64(seed, (uintptr_t)&partial_quicksort_rand); |
|
seed = seed_mix_u64(seed, (uintptr_t)&seed); |
|
seed = seed_mix_u64(seed, (uintptr_t)arr); |
|
index_rng_t rng = index_rng(seed); |
|
|
|
partial_quicksort_rand_rec(&rng, arr, lo, hi, 0, len); |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 2: Incremental quicksort (pivot indices are shared between calls) |
|
//-----------------------------------------------------------------------------* |
|
|
|
// Partially quick-sort elements from [l,r[ so that those whose ranks in the |
|
// sorted sequence belong to [lo,hi[ end up in their correct positions, taking |
|
// into account the history of partitions from previous invocations. |
|
// |
|
// The pivot indices of past partitions are stored in the part_idx array in |
|
// the order of preorder traversal of the quicksort tree. Special index values |
|
// SORTED and CHAOS signify that an interval is, respectively, fully sorted and |
|
// has not been partitioned yet (all part_idx must initially be set to CHAOS). |
|
#define INC_QUICKSORT_CHAOS_MARK ((index_t)-1) |
|
#define INC_QUICKSORT_SORTED_MARK ((index_t)-2) |
|
static inline_within void |
|
inc_quicksort_rand_rec(index_rng_t *rng, item_t *arr, index_t lo, index_t hi, |
|
index_t l, index_t r, index_t *part_idx) { |
|
for (;;) { |
|
int will_be_sorted = (lo <= l && r <= hi); |
|
if unlikely(r-l <= QUICKSORT_CUTOFF_HARD || |
|
(r-l <= QUICKSORT_CUTOFF_SOFT && will_be_sorted)) { |
|
quicksort_cutoff_sort(arr+l, r-l); |
|
*part_idx = INC_QUICKSORT_SORTED_MARK; |
|
return; |
|
} |
|
index_t r_left = *part_idx; |
|
if likely(r_left == INC_QUICKSORT_CHAOS_MARK) { |
|
index_t pivot_idx = median_rand_3(rng, arr+l, r-l); |
|
r_left = l + partition(arr+l, r-l, pivot_idx); |
|
} |
|
*part_idx = will_be_sorted ? INC_QUICKSORT_SORTED_MARK : r_left; |
|
index_t l_right = r_left + 1; |
|
|
|
unsigned go_left = (lo < r_left && r_left-l > 1); |
|
unsigned go_right = (l_right < hi && r-l_right > 1); |
|
index_t *part_idx_left = part_idx + 1; |
|
index_t *part_idx_right = part_idx + (l_right - l); |
|
go_left = (go_left && *part_idx_left != INC_QUICKSORT_SORTED_MARK); |
|
go_right = (go_right && *part_idx_right != INC_QUICKSORT_SORTED_MARK); |
|
|
|
switch fickle((go_left<<1) | go_right) { |
|
case 0: { return; } break; |
|
case 1: right: { l = l_right, part_idx = part_idx_right; } break; |
|
case 2: left: { r = r_left, part_idx = part_idx_left; } break; |
|
case 3: { |
|
if (r_left-l <= r-l_right) { |
|
inc_quicksort_rand_rec(rng, arr, lo, hi, l, r_left, part_idx_left); |
|
goto right; |
|
} else { |
|
inc_quicksort_rand_rec(rng, arr, lo, hi, l_right, r, part_idx_right); |
|
goto left; |
|
} |
|
unreachable(); |
|
} break; |
|
default: { unreachable(); } break; |
|
} |
|
} |
|
} |
|
static inline void |
|
inc_quicksort_rand(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) { |
|
surely(len >= 0), surely(0 <= lo), surely(lo <= hi), surely(hi <= len); |
|
if (len <= 1 || lo >= hi || *part_idxs == INC_QUICKSORT_SORTED_MARK) { return; } |
|
|
|
uint64_t seed = -1; |
|
seed = seed_mix_u64(seed, (uintptr_t)&memset); |
|
seed = seed_mix_u64(seed, (uintptr_t)&inc_quicksort_rand); |
|
seed = seed_mix_u64(seed, (uintptr_t)&seed); |
|
seed = seed_mix_u64(seed, (uintptr_t)arr); |
|
index_rng_t rng = index_rng(seed); |
|
|
|
inc_quicksort_rand_rec(&rng, arr, lo, hi, 0, len, part_idxs); |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 3: Floyd-Rivest partitioning + N largest scan using binary heap |
|
//-----------------------------------------------------------------------------* |
|
|
|
// Binary heap implementation. Supports both min- and max-heaps, as well as |
|
// both left-to-right and right-to-left storage layout in memory. The unifying |
|
// moniker of "lighter" is used for the heap property predicate: If item X is |
|
// lighter than item Y, it means that X cannot be a child of Y in the heap |
|
// (i.e., the former metaphorically wants to bubble up above the latter). |
|
#define heap_lighter(x, y) ((min_heap) ? item_less((x), (y)) : item_less((y), (x))) |
|
#define heap_at(i) (*((stored_backwards) ? (root)-(i) : (root)+(i))) |
|
static inline void |
|
heap_fix_down(int min_heap, int stored_backwards, item_t *root, index_t node, index_t len) { |
|
surely(len > 0), surely(0 <= node), surely(node < len); |
|
item_t x = heap_at(node); |
|
for (index_t child; child = (node<<1)|1, child < len; node = child) { |
|
child += (likely(child+1 != len) && heap_lighter(heap_at(child+1), heap_at(child))); |
|
if fickle(!heap_lighter(heap_at(child), x)) { break; } |
|
heap_at(node) = heap_at(child); |
|
} |
|
heap_at(node) = x; |
|
} |
|
#undef heap_lighter |
|
#undef heap_at |
|
static inline_within void |
|
heap_build(int min_heap, int stored_backwards, item_t *root, index_t len) { |
|
surely(len >= 0); |
|
for (index_t i = len/2; i != 0; i--) { |
|
heap_fix_down(min_heap, stored_backwards, root, i-1, len); |
|
} |
|
} |
|
#define lr_max_heap_fix_down(root, node, len) (heap_fix_down(0,0, (root), (node), (len))) |
|
#define rl_min_heap_fix_down(root, node, len) (heap_fix_down(1,1, (root), (node), (len))) |
|
#define lr_max_heap_build(root, len) (heap_build(0,0, (root), (len))) |
|
#define rl_min_heap_build(root, len) (heap_build(1,1, (root), (len))) |
|
|
|
// Use Floyd-Rivest Select algorithm to partition elements in [beg,end[ using |
|
// the pivot_rank-th largest element in that interval (counting from zero) for |
|
// the pivot, which hence ends up at exactly the pivot_rank index in the array |
|
// when partitioning completes. |
|
// |
|
// - Floyd, R. W., Rivest, R. L. (1975). Expected time bounds for selection. |
|
// Communications of the ACM, 18(3), 165-172. |
|
// - Kiwiel, K. C. (2005). On Floyd and Rivest's SELECT algorithm. |
|
// Theoretical Computer Science, 347(1-2), 214-238. |
|
static inline_within void |
|
rank_partition(item_t *arr, index_t beg, index_t end, index_t pivot_rank) { |
|
enum { CUTOFF_LEN = 600 }; // At most 4 levels of recursion for 32-bit ranges. |
|
surely(beg <= end), surely(beg <= pivot_rank), surely(pivot_rank < end); |
|
ptrdiff_t l = beg, r = end-1, k = pivot_rank; |
|
while (l < r) { |
|
ptrdiff_t n = r - l + 1; |
|
ptrdiff_t m = k - l + 1; |
|
if (n > CUTOFF_LEN) { |
|
double log_n = log(n); |
|
ptrdiff_t sample_len = (ptrdiff_t)exp(log_n*(2/3.0)) / 2; |
|
double gap_sq = log_n * (n - sample_len) * sample_len / n; |
|
ptrdiff_t gap = (1 - 2*(m < n/2)) * (ptrdiff_t)sqrt(gap_sq) / 2; |
|
ptrdiff_t l_sample = k + gap - m*sample_len/n; |
|
ptrdiff_t r_sample = k + gap + (n-m)*sample_len/n; |
|
if unlikely(l_sample < l) { l_sample = l; } |
|
if unlikely(r_sample > r) { r_sample = r; } |
|
rank_partition(arr, l_sample, r_sample+1, k); |
|
} |
|
ptrdiff_t p = l + partition(arr+l, n, m-1); |
|
if fickle(k <= p) { r = p-1; } |
|
if fickle(p <= k) { l = p+1; } |
|
} |
|
} |
|
// Partially heap-sort elements from [0,len[ so that those whose ranks in |
|
// the sorted sequence belong to [lo,hi[ take their correct positions. |
|
// |
|
// The implementation bifurcates based on whether the requested sorting interval |
|
// is closer to the beginning or the end of the array. In the former case, we: |
|
// |
|
// L1. Partition the array around the (hi-1)-st largest element as a pivot. |
|
// L2. Build a right-to-left min-heap out of elements in [lo,hi[ so that |
|
// the root node holding the minimum value is stored at index hi-1. |
|
// L3. For each element in [0,lo[, if it is exceeding the current heap |
|
// minimum, exchange the former with the latter and fix the heap. |
|
// L4. Fill [lo,hi[ going left to right with successively removed heap |
|
// minimums, simultaneously shortening (and maintaining) the heap. |
|
// |
|
// Otherwise, if the interval is closer to the end of the array, we: |
|
// |
|
// R1. Partition the array around the lo-th largest element as a pivot. |
|
// R2. Build a left-to-right max-heap out of elements in [lo,hi[ so that |
|
// the root node holding the maximum value is stored at index lo. |
|
// R3. For each element in [hi,len[, if it is inferior to the current heap |
|
// maximum, exchange the former with the latter and fix the heap. |
|
// R4. Fill [lo,hi[ going right to left with successively removed heap |
|
// maximums, simultaneously shortening (and maintaining) the heap. |
|
static void |
|
rank_partitioned_heapsort(item_t *arr, index_t len, index_t lo, index_t hi) { |
|
surely(len >= 0), surely(0 <= lo), surely(lo < hi), surely(hi <= len); |
|
if unlikely(len <= 1 || lo >= hi) { return; } |
|
|
|
if (lo < len-hi) { |
|
rank_partition(arr, 0, len, hi-1); |
|
|
|
index_t heap_len = hi - lo; |
|
item_t *heap_root = arr + hi - 1; |
|
rl_min_heap_build(heap_root, heap_len); |
|
|
|
for (index_t i = 0; i != lo; i++) { |
|
if (!item_less(*heap_root, arr[i])) { continue; } |
|
swap(*heap_root, arr[i]); |
|
rl_min_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
|
|
for (index_t i = lo; i != hi-1; i++) { |
|
swap(arr[i], *heap_root); |
|
rl_min_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} else { |
|
if likely(lo != 0) { rank_partition(arr, 0, len, lo); } |
|
|
|
index_t heap_len = hi - lo; |
|
item_t *heap_root = arr + lo; |
|
lr_max_heap_build(heap_root, heap_len); |
|
|
|
for (index_t i = hi; i != len; i++) { |
|
if (!item_less(arr[i], *heap_root)) { continue; } |
|
swap(*heap_root, arr[i]); |
|
lr_max_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
|
|
for (index_t i = hi-1; i != lo; i--) { |
|
swap(arr[i], *heap_root); |
|
lr_max_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// BENCHMARKS |
|
//-----------------------------------------------------------------------------* |
|
|
|
#include <stdio.h> // printf() |
|
#include <stdlib.h> // calloc(), free() |
|
#include <time.h> // clock_gettime() |
|
|
|
static must_inline uint64_t |
|
nsec(void) { |
|
struct timespec t; |
|
clock_gettime(CLOCK_MONOTONIC_RAW, &t); |
|
return (uint64_t)t.tv_sec*1000*1000*1000 + (uint64_t)t.tv_nsec; |
|
} |
|
|
|
static inline void |
|
shuffle(struct pcg32 *r, item_t *arr, uint32_t n) { |
|
for (uint32_t i = 0; i < n; i++) { |
|
swap(arr[i], arr[onto_range_u32(pcg32_next(r), i, n)]); |
|
} |
|
} |
|
|
|
static inline_within void |
|
call_partial_quicksort_rand(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) { |
|
(void)part_idxs; |
|
partial_quicksort_rand(arr, len, lo, hi); |
|
} |
|
static inline_within void |
|
call_rank_partitioned_heapsort(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) { |
|
(void)part_idxs; |
|
rank_partitioned_heapsort(arr, len, lo, hi); |
|
} |
|
|
|
extern int |
|
main(void) { |
|
enum { |
|
RANDOMIZE = 0, |
|
VERBOSE = 0, |
|
}; |
|
typedef void partial_sort_test_func(item_t *, index_t *, index_t, index_t, index_t); |
|
static struct test_func { |
|
partial_sort_test_func *func; |
|
char const *name; |
|
} const funcs[] = { |
|
#define X(p,f) {glue(p,f), #f} |
|
X(,inc_quicksort_rand), |
|
X(call_,partial_quicksort_rand), |
|
X(call_,rank_partitioned_heapsort), |
|
#undef X |
|
}; enum { N_FUNCS = sizeof(funcs)/sizeof(funcs[0]) }; |
|
static struct test_params { |
|
int n_runs; |
|
int n_sorts; |
|
index_t arr_len; |
|
index_t min_sort_len; |
|
index_t max_sort_len; |
|
} const tests[] = { |
|
{100, 5, 100, 5, 20}, |
|
{7000, 1, 100*1000, 100, 1000}, |
|
{500, 100, 100*1000, 100, 1000}, |
|
{30, 500, 1000*1000, 100, 5000}, |
|
{8, 2000, 1000*1000, 100, 5000}, |
|
{4, 500, 10*1000*1000, 100, 10*1000}, |
|
{4, 25, 100*1000*1000, 100, 10*1000}, |
|
}; enum { N_TESTS = sizeof(tests)/sizeof(tests[0]) }; |
|
|
|
index_t max_arr_len = 0; |
|
for (int i = 0; i < N_TESTS; i++) { |
|
if (max_arr_len < tests[i].arr_len) { |
|
max_arr_len = tests[i].arr_len; |
|
} |
|
} |
|
item_t *arr = calloc(max_arr_len, sizeof(arr[0])); |
|
index_t *part_idxs = calloc(max_arr_len, sizeof(part_idxs[0])); |
|
surely(arr != NULL), surely(part_idxs != NULL); |
|
if (!arr || !part_idxs) { return 1; } |
|
|
|
uint64_t const rng_seed = RANDOMIZE ? nsec() : 809; |
|
|
|
for (int i_func = 0; i_func < N_FUNCS; i_func++) { |
|
partial_sort_test_func *const partial_sort = funcs[i_func].func; |
|
printf("\nFunction %s\n", funcs[i_func].name); |
|
|
|
struct pcg32 rng = pcg32(rng_seed); |
|
|
|
uint64_t all_tests_dt = 0; |
|
for (int i_test = 0; i_test < N_TESTS; i_test++) { |
|
struct test_params const T = tests[i_test]; |
|
if (VERBOSE) { |
|
printf("Test %3d. Parameters: %d x %d, %td, %td..%td\n", |
|
i_test+1, T.n_runs, T.n_sorts, (ptrdiff_t)T.arr_len, |
|
(ptrdiff_t)T.min_sort_len, (ptrdiff_t)T.max_sort_len); |
|
} |
|
|
|
uint64_t test_dt = 0; |
|
for (int i_run = 0; i_run < T.n_runs; i_run++) { |
|
// Initialize the array to a random permutation (of its indices). |
|
for (index_t i = 0; i < T.arr_len; i++) { arr[i] = (item_t)i; } |
|
shuffle(&rng, arr, T.arr_len); |
|
// Set pivot indices to INC_QUICKSORT_CHAOS_MARK for inc_quicksort_rand(): |
|
apriori(INC_QUICKSORT_CHAOS_MARK == (index_t)-1); |
|
memset(part_idxs, 0xFF, T.arr_len*sizeof(part_idxs[0])); |
|
|
|
uint64_t run_dt = 0; |
|
for (int i_sort = 0; i_sort < T.n_sorts; i_sort++) { |
|
index_t span = onto_range_u32(pcg32_next(&rng), T.min_sort_len, T.max_sort_len+1); |
|
index_t lo = onto_range_u32(pcg32_next(&rng), 0, T.arr_len-span+1); |
|
index_t hi = lo + span; |
|
|
|
uint64_t t0 = nsec(); |
|
(*partial_sort)(arr, part_idxs, T.arr_len, lo, hi); |
|
uint64_t dt = nsec() - t0; |
|
run_dt += dt; |
|
|
|
int ok = 1; index_t first_diff = 0; |
|
for (index_t i = lo; i < hi; i++) { |
|
if (arr[i] != (item_t)i) { |
|
ok = 0, first_diff = i; |
|
break; |
|
} |
|
} |
|
if (VERBOSE || !ok) { |
|
printf("Test %3d, run %3d, sort %3d. [%12td %12td] (%6td): %4s %.9f\n", |
|
i_test+1, i_run+1, i_sort+1, (ptrdiff_t)lo, (ptrdiff_t)hi-1, (ptrdiff_t)span, |
|
ok ? "OK" : "FAIL", dt*1e-9/span); |
|
} |
|
if (!ok) { |
|
enum { BEFORE = 5, AFTER = 10 }; |
|
index_t l = first_diff-lo >= BEFORE ? first_diff-BEFORE : lo; |
|
index_t r = hi-first_diff >= AFTER+1 ? first_diff+AFTER+1 : hi; |
|
printf(" [%td..%td]:", (ptrdiff_t)l, (ptrdiff_t)r-1); |
|
for (index_t i = l; i < r; i++) { |
|
printf(" %llu", (unsigned long long)arr[i]); |
|
} |
|
printf("\n"); |
|
} |
|
} |
|
test_dt += run_dt; |
|
|
|
if (VERBOSE) { |
|
printf("Test %3d, run %3d. Elapsed: %9.6f\n", |
|
i_test+1, i_run+1, run_dt*1e-9); |
|
} |
|
} |
|
all_tests_dt += test_dt; |
|
|
|
printf("Test %3d. Elapsed: %7.3f total, %9.6f per run\n", |
|
i_test+1, test_dt*1e-9, test_dt*1e-9/T.n_runs); |
|
fflush(stdout); |
|
} |
|
|
|
printf("Total test time: %7.3f\n", all_tests_dt*1e-9); |
|
fflush(stdout); |
|
} |
|
|
|
free(arr), arr = NULL; |
|
free(part_idxs), part_idxs = NULL; |
|
|
|
return 0; |
|
} |