|
// Copyright 2024 Igor Burago |
|
// SPDX-License-Identifier: ISC |
|
|
|
#include <math.h> // for partition_floyd_rivest() |
|
#include <stddef.h> // ptrdiff_t |
|
#include <stdint.h> |
|
#include <string.h> // memcpy(), memmove(), memset() |
|
|
|
// Built-in u128 is not strictly required, but it makes mcg64 simpler and faster. |
|
typedef __uint128_t uint128_t; |
|
|
|
#define must_inline inline __attribute__((always_inline)) |
|
#define must_not_inline __attribute__((noinline)) |
|
#define inline_within __attribute__((flatten)) |
|
|
|
#if __has_builtin(__builtin_unpredictable) |
|
#define fickle(x) (__builtin_unpredictable((x))) |
|
#else |
|
#define fickle(x) (x) |
|
#endif |
|
#if __has_builtin(__builtin_expect) |
|
#define usually(x, c) (__builtin_expect((x), (c))) |
|
#else |
|
#define usually(x, c) (x) |
|
#endif |
|
#define likely(x) (usually(!!(x), 1)) |
|
#define unlikely(x) (usually(!!(x), 0)) |
|
|
|
#define glue_raw(x, y) x##y |
|
#define glue(x, y) glue_raw(x, y) |
|
#define dub(name) glue(name, glue(__L, __LINE__)) |
|
|
|
#define apriori(x) extern void *(*compile_time_check_dummy(void))[ \ |
|
sizeof(struct { unsigned compile_time_check : (x) ? 1 : -1; })] |
|
|
|
// #define DEBUG_BUILD |
|
#if defined DEBUG_BUILD |
|
#include <stdio.h> |
|
#define surely(x) (likely(x) ? (void)0 : \ |
|
(fprintf(stderr, "FATAL: %s:%d: surely(%s) has failed!\n", __FILE__, __LINE__, #x), \ |
|
fflush(NULL), \ |
|
__builtin_trap())) |
|
#define unreachable() surely(0 && "unreachable") |
|
#else |
|
#if __has_builtin(__builtin_unreachable) |
|
#define unreachable() (__builtin_unreachable()) |
|
#else |
|
#define unreachable() (*(volatile int *)0 = 0) |
|
#endif |
|
#define surely(x) do { if (!(x)) { for (;;) { unreachable(); } } } while (0) |
|
#endif |
|
|
|
#define swap(x, y) do { \ |
|
void *dub(xp) = &(x), *dub(yp) = &(y); \ |
|
apriori(sizeof(x) == sizeof(y)); \ |
|
unsigned char dub(tmp)[sizeof(x)]; \ |
|
memcpy(dub(tmp), dub(xp), sizeof(dub(tmp))); \ |
|
memmove(dub(xp), dub(yp), sizeof(dub(tmp))); \ |
|
memcpy(dub(yp), dub(tmp), sizeof(dub(tmp))); \ |
|
} while (0) |
|
|
|
static inline uint64_t |
|
splitmix64_next(uint64_t *s) { |
|
uint64_t x = *s += 0x9E3779B97F4A7C15u; |
|
x = (x^(x>>30)) * 0xBF58476D1CE4E5B9u; |
|
x = (x^(x>>27)) * 0x94D049BB133111EBu; |
|
return x^(x>>31); |
|
} |
|
|
|
struct mcg64 { uint128_t s; }; |
|
static inline struct mcg64 |
|
mcg64(uint64_t seed) { |
|
uint64_t a = splitmix64_next(&seed); |
|
uint64_t b = splitmix64_next(&seed); |
|
return (struct mcg64){((uint128_t)a << 64) + b}; |
|
} |
|
static must_inline uint64_t |
|
mcg64_next(struct mcg64 *r) { |
|
return (r->s *= 0xDA942042E4DD58B5u) >> 64; |
|
} |
|
|
|
// Fast and only slightly biased for large (end-beg) approaching u32/u64 max. |
|
static must_inline uint32_t |
|
onto_range_u32(uint32_t x, uint32_t beg, uint32_t end) { |
|
x = ((uint64_t)x * (end - beg)) >> 32; |
|
return beg + x; |
|
} |
|
static must_inline uint64_t |
|
onto_range_u64(uint64_t x, uint64_t beg, uint64_t end) { |
|
x = ((uint128_t)x * (end - beg)) >> 64; |
|
return beg + x; |
|
} |
|
|
|
static must_inline uint64_t |
|
load_u64(void *p, size_t size) { |
|
union { unsigned char b[sizeof(uint64_t)]; uint64_t u; } x = {.u = 0}; |
|
memcpy(x.b, p, size <= sizeof(uint64_t) ? size : sizeof(uint64_t)); |
|
return x.u; |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// ARRAY TYPE PARAMETERS |
|
//-----------------------------------------------------------------------------* |
|
|
|
typedef int item_t; |
|
#define item_less(x, y) ((x) < (y)) |
|
|
|
// Both signed and unsigned types are supported: |
|
typedef uint32_t idx_t; |
|
|
|
#if 0 |
|
#define IDX_RNG_USING_PCG |
|
#endif |
|
#if defined IDX_RNG_USING_PCG |
|
typedef struct pcg32 idx_rng_t; |
|
#define idx_rng(seed) (pcg32(seed)) |
|
#define idx_rand(rng, len) (onto_range_u32(pcg32_next(rng), 0, (len))) |
|
#else |
|
// MCG is faster than LCG or PCG, and is usually good enough for quicksort: |
|
typedef struct mcg64 idx_rng_t; |
|
#define idx_rng(seed) (mcg64(seed)) |
|
#define idx_rand(rng, len) (onto_range_u64(mcg64_next(rng), 0, (len))) |
|
#endif |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 1: One-off partial quicksort (no state shared between calls) |
|
//-----------------------------------------------------------------------------* |
|
|
|
#define max(x, y) ((x) > (y) ? (x) : (y)) |
|
enum { |
|
QSORT_CUTOFF_HARD = 4, |
|
QSORT_CUTOFF_SOFT = max(8, 64/sizeof(item_t)), |
|
}; |
|
apriori(QSORT_CUTOFF_HARD >= 3); // We use the median-of-three pivoting. |
|
|
|
// Insertion sort. |
|
static inline void |
|
qsort_cutoff_sort(item_t *arr, idx_t len) { |
|
surely(len >= 0); |
|
for (idx_t i = (len != 0); i != len; i++) { |
|
if fickle(!item_less(arr[i], arr[i-1])) { continue; } |
|
item_t x = arr[i]; |
|
idx_t j = i; |
|
do { arr[j] = arr[j-1]; } while (--j != 0 && fickle(item_less(x, arr[j-1]))); |
|
arr[j] = x; |
|
} |
|
} |
|
// Hoare-partition elements in [0,len[ using the item at piv_idx for pivot. |
|
static must_inline idx_t |
|
partition(item_t *arr, idx_t len, idx_t piv_idx) { |
|
surely(len >= 2); surely(0 <= piv_idx); surely(piv_idx < len); |
|
|
|
item_t piv = arr[piv_idx]; |
|
arr[piv_idx] = arr[0]; |
|
piv_idx = item_less(arr[len-1], piv) ? len-1 : 0; |
|
arr[0] = arr[piv_idx]; |
|
arr[piv_idx] = piv; |
|
|
|
idx_t i = 0, j = len-1; |
|
for (;;) { |
|
do { i++; } while (item_less(arr[i], piv)); |
|
do { j--; } while (item_less(piv, arr[j])); |
|
if (i >= j) { break; } |
|
swap(arr[i], arr[j]); |
|
} |
|
j += (piv_idx != 0); |
|
swap(arr[j], arr[piv_idx]); |
|
return j; |
|
} |
|
static must_inline idx_t |
|
median_rand_3(idx_rng_t *rng, item_t *arr, idx_t len) { |
|
surely(len >= 3); |
|
|
|
idx_t i[3]; |
|
i[0] = idx_rand(rng, len-0); |
|
i[1] = idx_rand(rng, len-1); |
|
i[2] = idx_rand(rng, len-2); |
|
unsigned i1_past_i0 = (i[1] >= i[0]); |
|
i[1] += i1_past_i0; // Count i[1]-th item, jumping over i[0]-th. |
|
i[2] += (i[2] >= i[!i1_past_i0]); // Count i[2]-th, skipping min(i[0],i[1])-th... |
|
i[2] += (i[2] >= i[i1_past_i0]); // ...and then max(i[0],i[1])-th. |
|
|
|
unsigned gt01 = !!item_less(arr[i[1]], arr[i[0]]); |
|
unsigned lt02 = !!item_less(arr[i[0]], arr[i[2]]); |
|
unsigned lt12 = !!item_less(arr[i[1]], arr[i[2]]); |
|
return i[(gt01^lt02) + (lt02^lt12)]; |
|
} |
|
// Partially quick-sort elements from [l,r[ so that those whose ranks in |
|
// the sorted sequence belong to [lo,hi[ take their correct positions. |
|
static void |
|
part_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi, idx_t l, idx_t r) { |
|
for (;;) { |
|
if unlikely(r-l <= QSORT_CUTOFF_HARD || |
|
(r-l <= QSORT_CUTOFF_SOFT && lo <= l && r <= hi)) { |
|
qsort_cutoff_sort(arr+l, r-l); |
|
return; |
|
} |
|
idx_t r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l)); |
|
idx_t l1 = r1 + 1; |
|
unsigned go_left = (lo < r1 && r1-l > 1); |
|
unsigned go_right = (l1 < hi && r-l1 > 1); |
|
|
|
switch fickle((go_left<<1) | go_right) { |
|
case 0: { return; } |
|
case 1: right: { l = l1; break; } |
|
case 2: left: { r = r1; break; } |
|
case 3: { |
|
if (r1-l <= r-l1) { |
|
part_qsort_rand_rec(rng, arr, lo, hi, l, r1); |
|
goto right; |
|
} else { |
|
part_qsort_rand_rec(rng, arr, lo, hi, l1, r); |
|
goto left; |
|
} |
|
} |
|
default: { unreachable(); } |
|
} |
|
} |
|
} |
|
static inline void |
|
part_qsort_rand(item_t *arr, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi) { return; } |
|
|
|
idx_rng_t rng = idx_rng((uintptr_t)arr); |
|
idx_t i = idx_rand(&rng, len); |
|
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0]))); |
|
|
|
part_qsort_rand_rec(&rng, arr, lo, hi, 0, len); |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 2: Incremental quicksort (pivot indices are shared between calls) |
|
//-----------------------------------------------------------------------------* |
|
|
|
// Partially quick-sort elements from [l,r[ so that those whose ranks in the |
|
// sorted sequence belong to [lo,hi[ end up in their correct positions, taking |
|
// into account the history of partitions from previous invocations. |
|
// |
|
// The pivot indices of past partitions are stored in the piv_idx array in the |
|
// order of preorder traversal of the quicksort tree. Special index values |
|
// SORTED and CHAOS signify that an interval is, respectively, fully sorted and |
|
// has not been partitioned yet (all piv_idx must initially be set to CHAOS). |
|
#define INC_QSORT_CHAOS ((idx_t)-1) |
|
#define INC_QSORT_SORTED ((idx_t)-2) |
|
static void |
|
inc_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi, |
|
idx_t l, idx_t r, idx_t *piv_idx) { |
|
for (;;) { |
|
int will_be_sorted = (lo <= l && r <= hi); |
|
if unlikely(r-l <= QSORT_CUTOFF_HARD || |
|
(r-l <= QSORT_CUTOFF_SOFT && will_be_sorted)) { |
|
qsort_cutoff_sort(arr+l, r-l); |
|
*piv_idx = INC_QSORT_SORTED; |
|
return; |
|
} |
|
idx_t r1 = *piv_idx; |
|
if likely(r1 == INC_QSORT_CHAOS) { |
|
r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l)); |
|
} |
|
*piv_idx = will_be_sorted ? INC_QSORT_SORTED : r1; |
|
idx_t l1 = r1 + 1; |
|
|
|
idx_t *piv_idx_left = piv_idx + 1; |
|
idx_t *piv_idx_right = piv_idx + (l1 - l); |
|
unsigned go_left = (lo < r1 && r1-l > 1 && *piv_idx_left != INC_QSORT_SORTED); |
|
unsigned go_right = (l1 < hi && r-l1 > 1 && *piv_idx_right != INC_QSORT_SORTED); |
|
|
|
switch fickle((go_left<<1) | go_right) { |
|
case 0: { return; } |
|
case 1: right: { l = l1, piv_idx = piv_idx_right; break; } |
|
case 2: left: { r = r1, piv_idx = piv_idx_left; break; } |
|
case 3: { |
|
if (r1-l <= r-l1) { |
|
inc_qsort_rand_rec(rng, arr, lo, hi, l, r1, piv_idx_left); |
|
goto right; |
|
} else { |
|
inc_qsort_rand_rec(rng, arr, lo, hi, l1, r, piv_idx_right); |
|
goto left; |
|
} |
|
} |
|
default: { unreachable(); } |
|
} |
|
} |
|
} |
|
static inline void |
|
inc_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi || *piv_idxs == INC_QSORT_SORTED) { return; } |
|
|
|
idx_rng_t rng = idx_rng((uintptr_t)arr); |
|
idx_t i = idx_rand(&rng, len); |
|
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0]))); |
|
|
|
inc_qsort_rand_rec(&rng, arr, lo, hi, 0, len, piv_idxs); |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// OPTION 3: Floyd-Rivest partitioning + N largest scan using binary heap |
|
//-----------------------------------------------------------------------------* |
|
|
|
// Binary heap implementation. Supports both min- and max-heaps, as well as |
|
// both left-to-right and right-to-left storage layout in memory. |
|
#define heap_superior(x, y) ((min_heap) ? item_less((x), (y)) : item_less((y), (x))) |
|
#define heap_at(i) (*((stored_backwards) ? (root)-(i) : (root)+(i))) |
|
static inline void |
|
heap_fix_down(int min_heap, int stored_backwards, item_t *root, idx_t node, idx_t len) { |
|
surely(len > 0); surely(0 <= node); surely(node < len); |
|
item_t x = heap_at(node); |
|
for (idx_t child; child = (node<<1)|1, child < len; node = child) { |
|
if likely(child+1 != len) { |
|
child += !!heap_superior(heap_at(child+1), heap_at(child)); |
|
} |
|
if fickle(!heap_superior(heap_at(child), x)) { break; } |
|
heap_at(node) = heap_at(child); |
|
} |
|
heap_at(node) = x; |
|
} |
|
#undef heap_superior |
|
#undef heap_at |
|
static inline_within void |
|
heap_build(int min_heap, int stored_backwards, item_t *root, idx_t len) { |
|
surely(len >= 0); |
|
for (idx_t i = len/2; i != 0; i--) { |
|
heap_fix_down(min_heap, stored_backwards, root, i-1, len); |
|
} |
|
} |
|
#define lr_max_heap_fix_down(root, node, len) (heap_fix_down(0,0, (root), (node), (len))) |
|
#define rl_min_heap_fix_down(root, node, len) (heap_fix_down(1,1, (root), (node), (len))) |
|
#define lr_max_heap_build(root, len) (heap_build(0,0, (root), (len))) |
|
#define rl_min_heap_build(root, len) (heap_build(1,1, (root), (len))) |
|
|
|
// Use Floyd-Rivest Select algorithm to partition elements in [beg,end[ using |
|
// the piv_rank-th largest element in that interval (counting from zero) for |
|
// the pivot, which hence ends up at exactly the piv_rank index in the array |
|
// when partitioning completes. |
|
// |
|
// - Floyd, R. W., Rivest, R. L. (1975). Expected time bounds for selection. |
|
// Communications of the ACM, 18(3), 165-172. |
|
// - Kiwiel, K. C. (2005). On Floyd and Rivest's SELECT algorithm. |
|
// Theoretical Computer Science, 347(1-2), 214-238. |
|
static void |
|
partition_floyd_rivest(item_t *arr, idx_t beg, idx_t end, idx_t piv_rank) { |
|
enum { CUTOFF_LEN = 600 }; // At most 4 levels of recursion for 32-bit ranges. |
|
surely(beg <= end); surely(beg <= piv_rank); surely(piv_rank < end); |
|
ptrdiff_t l = beg, r = end-1, k = piv_rank; |
|
while (l < r) { |
|
ptrdiff_t n = r - l + 1; |
|
ptrdiff_t m = k - l + 1; |
|
if (n > CUTOFF_LEN) { |
|
double log_n = log(n); |
|
ptrdiff_t s = (ptrdiff_t)exp(log_n*(2/3.0)) / 2; |
|
ptrdiff_t g = (1 - 2*(m < n/2)) * (ptrdiff_t)sqrt(log_n*(n-s)*s/n) / 2; |
|
ptrdiff_t l1 = k + g - m*s/n; |
|
ptrdiff_t r1 = k + g + (n-m)*s/n; |
|
if unlikely(l1 < l) { l1 = l; } |
|
if unlikely(r1 > r) { r1 = r; } |
|
partition_floyd_rivest(arr, l1, r1+1, k); |
|
} |
|
ptrdiff_t p = l + partition(arr+l, n, m-1); |
|
if fickle(k <= p) { r = p-1; } |
|
if fickle(p <= k) { l = p+1; } |
|
} |
|
} |
|
// Partially heap-sort elements from [0,len[ so that those whose ranks in |
|
// the sorted sequence belong to [lo,hi[ take their correct positions. |
|
// |
|
// If the requested sorting interval is closer to the beginning of the array: |
|
// |
|
// (A1) Partition the array around the (hi-1)-st largest element as a pivot. |
|
// (A2) Build a right-to-left min-heap out of elements in [lo,hi[ so that |
|
// the root node holding the minimum value is stored at index hi-1. |
|
// (A3) For each element in [0,lo[, if it is exceeding the current heap |
|
// minimum, exchange the former with the latter, fixing the heap. |
|
// (A4) Fill [lo,hi[ going left to right with successively removed heap |
|
// minimums, simultaneously shortening (and maintaining) the heap. |
|
// |
|
// Otherwise, if the interval is closer to the end of the array: |
|
// |
|
// (B1) Partition the array around the lo-th largest element as a pivot. |
|
// (B2) Build a left-to-right max-heap out of elements in [lo,hi[ so that |
|
// the root node holding the maximum value is stored at index lo. |
|
// (B3) For each element in [hi,len[, if it is inferior to the current heap |
|
// maximum, exchange the former with the latter, fixing the heap. |
|
// (B4) Fill [lo,hi[ going right to left with successively removed heap |
|
// maximums, simultaneously shortening (and maintaining) the heap. |
|
static inline void |
|
part_hsort_frsel(item_t *arr, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo < hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi) { return; } |
|
|
|
if (lo < len-hi) { |
|
partition_floyd_rivest(arr, 0, len, hi-1); |
|
|
|
idx_t heap_len = hi - lo; |
|
item_t *heap_root = arr + hi - 1; |
|
rl_min_heap_build(heap_root, heap_len); |
|
for (idx_t i = 0; i != lo; i++) { |
|
if (!item_less(*heap_root, arr[i])) { continue; } |
|
swap(arr[i], *heap_root); |
|
rl_min_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
for (idx_t i = lo; i != hi; i++) { |
|
swap(arr[i], *heap_root); |
|
rl_min_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} else { |
|
if likely(lo != 0) { partition_floyd_rivest(arr, 0, len, lo); } |
|
|
|
idx_t heap_len = hi - lo; |
|
item_t *heap_root = arr + lo; |
|
lr_max_heap_build(heap_root, heap_len); |
|
for (idx_t i = hi; i != len; i++) { |
|
if (!item_less(arr[i], *heap_root)) { continue; } |
|
swap(arr[i], *heap_root); |
|
lr_max_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
for (idx_t i = hi-1; i != lo; i--) { |
|
swap(arr[i], *heap_root); |
|
lr_max_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} |
|
} |
|
|
|
|
|
//-----------------------------------------------------------------------------* |
|
// BENCHMARKS |
|
//-----------------------------------------------------------------------------* |
|
|
|
#include <stdio.h> // printf() |
|
#include <stdlib.h> // calloc(), free() |
|
#include <time.h> // clock_gettime() |
|
|
|
static inline uint64_t |
|
nsec(void) { |
|
struct timespec t; |
|
clock_gettime(CLOCK_MONOTONIC_RAW, &t); |
|
return (uint64_t)t.tv_sec*1000*1000*1000 + (uint64_t)t.tv_nsec; |
|
} |
|
|
|
struct pcg32 { uint64_t s, t; }; |
|
static inline struct pcg32 |
|
pcg32(uint64_t seed) { |
|
uint64_t a = splitmix64_next(&seed); |
|
uint64_t b = splitmix64_next(&seed); |
|
return (struct pcg32){.s = a, .t = b|1}; |
|
} |
|
static inline uint32_t |
|
pcg32_next(struct pcg32 *r) { |
|
uint32_t k = r->s >> 59; |
|
uint32_t x = (r->s ^ (r->s>>18)) >> 27; |
|
r->s = r->s*0x5851F42D4C957F2Du + r->t; |
|
return (x >> k) | (x << (-k & 31)); |
|
} |
|
static inline void |
|
shuffle(struct pcg32 *r, item_t *arr, uint32_t n) { |
|
for (uint32_t i = 0; i < n; i++) { |
|
swap(arr[i], arr[onto_range_u32(pcg32_next(r), i, n)]); |
|
} |
|
} |
|
|
|
static inline void |
|
call_part_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
part_qsort_rand(arr, len, lo, hi); (void)piv_idxs; |
|
} |
|
static inline void |
|
call_part_hsort_frsel(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
part_hsort_frsel(arr, len, lo, hi); (void)piv_idxs; |
|
} |
|
|
|
extern int |
|
main(void) { |
|
enum { |
|
RANDOMIZE = 0, |
|
VERBOSE = 0, |
|
}; |
|
typedef void partial_sort_test_fn(item_t*, idx_t*, idx_t, idx_t, idx_t); |
|
static const struct test_func { |
|
partial_sort_test_fn *func; |
|
const char *name; |
|
} funcs[] = { |
|
#define X(p,f) {glue(p,f), #f} |
|
X(,inc_qsort_rand), |
|
X(call_,part_qsort_rand), |
|
X(call_,part_hsort_frsel), |
|
#undef X |
|
}; enum { N_FUNCS = sizeof(funcs)/sizeof(funcs[0]) }; |
|
static const struct test_params { |
|
int n_runs; |
|
int n_sorts; |
|
idx_t arr_len; |
|
idx_t min_sort_len; |
|
idx_t max_sort_len; |
|
} tests[] = { |
|
{100, 5, 100, 5, 20}, |
|
{7000, 1, 100*1000, 100, 1000}, |
|
{500, 100, 100*1000, 100, 1000}, |
|
{30, 500, 1000*1000, 100, 5000}, |
|
{8, 2000, 1000*1000, 100, 5000}, |
|
{4, 500, 10*1000*1000, 100, 10*1000}, |
|
{4, 25, 100*1000*1000, 100, 10*1000}, |
|
}; enum { N_TESTS = sizeof(tests)/sizeof(tests[0]) }; |
|
|
|
idx_t max_arr_len = 0; |
|
for (int i = 0; i < N_TESTS; i++) { |
|
if (max_arr_len < tests[i].arr_len) { |
|
max_arr_len = tests[i].arr_len; |
|
} |
|
} |
|
item_t *arr = calloc(max_arr_len, sizeof(arr[0])); |
|
idx_t *piv_idxs = calloc(max_arr_len, sizeof(piv_idxs[0])); |
|
surely(arr); surely(piv_idxs); |
|
|
|
uint64_t rng_seed = RANDOMIZE ? nsec() : 1889; |
|
|
|
for (int i_func = 0; i_func < N_FUNCS; i_func++) { |
|
partial_sort_test_fn *const partial_sort = funcs[i_func].func; |
|
printf("\nFunction %s\n", funcs[i_func].name); |
|
|
|
struct pcg32 rng = pcg32(rng_seed); |
|
|
|
for (int i_test = 0; i_test < N_TESTS; i_test++) { |
|
const struct test_params T = tests[i_test]; |
|
if (VERBOSE) { |
|
printf("Test %3d. Parameters: %d x %d, %td, %td..%td\n", |
|
i_test+1, T.n_runs, T.n_sorts, (ptrdiff_t)T.arr_len, |
|
(ptrdiff_t)T.min_sort_len, (ptrdiff_t)T.max_sort_len); |
|
} |
|
|
|
uint64_t test_dt = 0; |
|
for (int i_run = 0; i_run < T.n_runs; i_run++) { |
|
// Initialize the array to a random permutation (of its indices). |
|
for (idx_t i = 0; i < T.arr_len; i++) { arr[i] = (item_t)i; } |
|
shuffle(&rng, arr, T.arr_len); |
|
// Set pivot indices to INC_QSORT_CHAOS for inc_qsort_rand(): |
|
apriori(INC_QSORT_CHAOS == (idx_t)-1); |
|
memset(piv_idxs, 0xFF, T.arr_len*sizeof(piv_idxs[0])); |
|
|
|
uint64_t run_dt = 0; |
|
for (int i_sort = 0; i_sort < T.n_sorts; i_sort++) { |
|
idx_t span = onto_range_u32(pcg32_next(&rng), T.min_sort_len, T.max_sort_len+1); |
|
idx_t lo = onto_range_u32(pcg32_next(&rng), 0, T.arr_len-span+1); |
|
idx_t hi = lo + span; |
|
|
|
uint64_t t0 = nsec(); |
|
(*partial_sort)(arr, piv_idxs, T.arr_len, lo, hi); |
|
uint64_t dt = nsec() - t0; |
|
run_dt += dt; |
|
|
|
int ok = 1; idx_t first_diff = 0; |
|
for (idx_t i = lo; i < hi; i++) { |
|
if (arr[i] != (item_t)i) { |
|
ok = 0, first_diff = i; |
|
break; |
|
} |
|
} |
|
if (VERBOSE || !ok) { |
|
printf("Test %3d, run %3d, sort %3d. [%12td %12td] (%6td): %4s %.9f\n", |
|
i_test+1, i_run+1, i_sort+1, (ptrdiff_t)lo, (ptrdiff_t)hi-1, (ptrdiff_t)span, |
|
ok ? "OK" : "FAIL", dt*1e-9/span); |
|
} |
|
if (!ok) { |
|
enum { BEFORE = 5, AFTER = 10 }; |
|
idx_t l = first_diff-lo >= BEFORE ? first_diff-BEFORE : lo; |
|
idx_t r = hi-first_diff >= AFTER+1 ? first_diff+AFTER+1 : hi; |
|
printf(" [%td..%td]:", (ptrdiff_t)l, (ptrdiff_t)r-1); |
|
for (idx_t i = l; i < r; i++) { |
|
printf(" %llu", (unsigned long long)arr[i]); |
|
} |
|
printf("\n"); |
|
} |
|
} |
|
test_dt += run_dt; |
|
|
|
if (VERBOSE) { |
|
printf("Test %3d, run %3d. Elapsed: %9.6f\n", |
|
i_test+1, i_run+1, run_dt*1e-9); |
|
} |
|
} |
|
|
|
printf("Test %3d. Elapsed: %7.3f total, %9.6f per run\n", |
|
i_test+1, test_dt*1e-9, test_dt*1e-9/T.n_runs); |
|
fflush(stdout); |
|
} |
|
} |
|
|
|
free(arr), arr = 0; |
|
free(piv_idxs), piv_idxs = 0; |
|
|
|
return 0; |
|
} |