|
/* Copyright 2024 Igor Burago. Distributed under the ISC License terms. */ |
|
|
|
#include <math.h> /* for partition_floyd_rivest() */ |
|
#include <stddef.h> /* ptrdiff_t */ |
|
#include <stdint.h> |
|
#include <string.h> /* memcpy(), memset() */ |
|
|
|
/* Built-in u128 is not strictly required, but it makes mcg64 simpler and faster. */ |
|
typedef __uint128_t uint128_t; |
|
|
|
#define mustinline inline __attribute__((always_inline)) |
|
|
|
#define likely(x) (__builtin_expect(!!(x), 1)) |
|
#define unlikely(x) (__builtin_expect(!!(x), 0)) |
|
|
|
#define glueraw(x, y) x##y |
|
#define glue(x, y) glueraw(x, y) |
|
#define dub(name) glue(name, glue(__L, __LINE__)) |
|
|
|
#define tacitly(x) extern void dub(dummy)(int dub(compile_time_check)[(x) ? 1 : -1]) |
|
|
|
//#define DEBUG_BUILD |
|
#ifdef DEBUG_BUILD |
|
#include <stdio.h> |
|
#define surely(x) do { \ |
|
if likely(x) {break;} \ |
|
fprintf(stderr, "FATAL: %s:%d: %s does not hold!\n", __FILE__, __LINE__, #x); \ |
|
__builtin_trap(); \ |
|
} while (0) |
|
#else |
|
#define surely(x) do {while (!(x)) {__builtin_unreachable();}} while (0) |
|
#endif |
|
|
|
#define swap(x, y) do { \ |
|
void *dub(xp) = &(x), *dub(yp) = &(y); \ |
|
char dub(swap)[sizeof(x) == sizeof(y) ? (int)sizeof(x) : -1]; \ |
|
memcpy(dub(swap), dub(xp), sizeof(dub(swap))); \ |
|
memcpy(dub(xp), dub(yp), sizeof(dub(swap))); \ |
|
memcpy(dub(yp), dub(swap), sizeof(dub(swap))); \ |
|
} while (0) |
|
|
|
static inline uint64_t |
|
splitmix64_next(uint64_t *s) { |
|
uint64_t x = *s += 0x9E3779B97F4A7C15u; |
|
x = (x^(x>>30)) * 0xBF58476D1CE4E5B9u; |
|
x = (x^(x>>27)) * 0x94D049BB133111EBu; |
|
return x^(x>>31); |
|
} |
|
|
|
struct mcg64 {uint128_t s;}; |
|
static inline struct mcg64 |
|
mcg64(uint64_t seed) { |
|
uint64_t a = splitmix64_next(&seed); |
|
uint64_t b = splitmix64_next(&seed); |
|
return (struct mcg64){((uint128_t)a << 64) + b}; |
|
} |
|
static mustinline uint64_t |
|
mcg64_next(struct mcg64 *r) { |
|
return (r->s *= 0xDA942042E4DD58B5u) >> 64; |
|
} |
|
|
|
/* Fast and only slightly biased for large (end-beg) approaching u32/u64 max. */ |
|
static mustinline uint32_t |
|
onto_range_u32(uint32_t x, uint32_t beg, uint32_t end) { |
|
x = ((uint64_t)x * (end - beg)) >> 32; |
|
return beg + x; |
|
} |
|
static mustinline uint64_t |
|
onto_range_u64(uint64_t x, uint64_t beg, uint64_t end) { |
|
x = ((uint128_t)x * (end - beg)) >> 64; |
|
return beg + x; |
|
} |
|
|
|
static mustinline uint64_t |
|
load_u64(void *p, size_t size) { |
|
if likely(size > sizeof(uint64_t)) {size = sizeof(uint64_t);} |
|
unsigned char b[sizeof(uint64_t)]; |
|
memcpy(b, p, size); |
|
uint64_t x = 0; |
|
memcpy(&x, b, size); |
|
return x; |
|
} |
|
|
|
|
|
/*----------------------------------------------------------------------------* |
|
* ARRAY TYPE PARAMETERS |
|
*----------------------------------------------------------------------------*/ |
|
|
|
typedef int item_t; |
|
#define item_less(x, y) ((x) < (y)) |
|
|
|
/* Both signed and unsigned types are supported: */ |
|
typedef uint32_t idx_t; |
|
/* MCG is faster than LCG or PCG, and is good enough for quicksort: */ |
|
typedef struct mcg64 idx_rng_t; |
|
#define idx_rng(seed) (mcg64(seed)) |
|
#define idx_rand(rng, beg, end) (onto_range_u32(mcg64_next(rng), (beg), (end))) |
|
|
|
|
|
/*----------------------------------------------------------------------------* |
|
* OPTION 1: One-off partial quicksort (no state shared between calls) |
|
*----------------------------------------------------------------------------*/ |
|
|
|
#define max(x, y) ((x) > (y) ? (x) : (y)) |
|
enum { |
|
QSORT_CUTOFF_HARD = 4, |
|
QSORT_CUTOFF_SOFT = max(8, 64/sizeof(item_t)), |
|
}; |
|
tacitly(QSORT_CUTOFF_HARD >= 3); /* We use the median-of-three pivoting. */ |
|
|
|
/* Insertion sort. */ |
|
static mustinline void |
|
qsort_cutoff_sort(item_t *arr, idx_t len) { |
|
for (idx_t j, i = likely(len != 0); i != len; i++) { |
|
item_t x = arr[i]; |
|
for (j = i; j != 0 && item_less(x, arr[j-1]); j--) { |
|
arr[j] = arr[j-1]; |
|
} |
|
arr[j] = x; |
|
} |
|
} |
|
/* Hoare-partition elements in [0,len) using the item at piv_idx for pivot. */ |
|
static mustinline idx_t |
|
partition(item_t *arr, idx_t len, idx_t piv_idx) { |
|
surely(len >= 2); surely(0 <= piv_idx); surely(piv_idx < len); |
|
|
|
item_t piv = arr[piv_idx]; |
|
arr[piv_idx] = arr[0]; |
|
piv_idx = item_less(arr[len-1], piv) ? len-1 : 0; |
|
arr[0] = arr[piv_idx]; |
|
arr[piv_idx] = piv; |
|
|
|
idx_t i = 0, j = len-1; |
|
for (;;) { |
|
do {i++;} while (item_less(arr[i], piv)); |
|
do {j--;} while (item_less(piv, arr[j])); |
|
if (i >= j) {break;} |
|
swap(arr[i], arr[j]); |
|
} |
|
j += (piv_idx != 0); |
|
swap(arr[j], arr[piv_idx]); |
|
return j; |
|
} |
|
static mustinline idx_t |
|
median_rand_3(idx_rng_t *rng, item_t *arr, idx_t len) { |
|
surely(len >= 3); |
|
|
|
idx_t i[3]; |
|
i[0] = idx_rand(rng, 0, len-0); |
|
i[1] = idx_rand(rng, 0, len-1); |
|
i[2] = idx_rand(rng, 0, len-2); |
|
int i1_after_i0 = (i[1] >= i[0]); |
|
i[1] += i1_after_i0; /* Count i[1]-th item, jumping over i[0]-th. */ |
|
i[2] += (i[2] >= i[!i1_after_i0]); /* Count i[2]-th, jumping over min(i[0],i[1])-th... */ |
|
i[2] += (i[2] >= i[i1_after_i0]); /* ...and then over max(i[0],i[1])-th. */ |
|
|
|
int lt01 = !!item_less(arr[i[0]], arr[i[1]]); |
|
int lt02 = !!item_less(arr[i[0]], arr[i[2]]); |
|
int lt12 = !!item_less(arr[i[1]], arr[i[2]]); |
|
return i[(1 ^ lt01 ^ lt02) + (lt02 ^ lt12)]; |
|
} |
|
/* |
|
* Partially quick-sort elements from [l,r) so that those whose ranks in |
|
* the sorted sequence belong to [lo,hi) take their correct positions. |
|
*/ |
|
static void |
|
part_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi, idx_t l, idx_t r) { |
|
for (;;) { |
|
if unlikely(r-l <= QSORT_CUTOFF_HARD || |
|
(r-l <= QSORT_CUTOFF_SOFT && lo <= l && r <= hi)) { |
|
qsort_cutoff_sort(arr+l, r-l); |
|
return; |
|
} |
|
idx_t r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l)); |
|
idx_t l1 = r1 + 1; |
|
int go_left = (lo < r1 && r1-l > 1); |
|
int go_right = (l1 < hi && r-l1 > 1); |
|
|
|
switch (2*go_left + go_right) { |
|
case 0: return; |
|
case 1: l = l1; break; |
|
case 2: r = r1; break; |
|
case 3: |
|
if (r1-l <= r-l1) { |
|
part_qsort_rand_rec(rng, arr, lo, hi, l, r1); |
|
l = l1; |
|
} else { |
|
part_qsort_rand_rec(rng, arr, lo, hi, l1, r); |
|
r = r1; |
|
} |
|
break; |
|
} |
|
} |
|
} |
|
static inline void |
|
part_qsort_rand(item_t *arr, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi) {return;} |
|
|
|
idx_rng_t rng = idx_rng((uintptr_t)arr); |
|
idx_t i = idx_rand(&rng, 0, len); |
|
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0]))); |
|
|
|
part_qsort_rand_rec(&rng, arr, lo, hi, 0, len); |
|
} |
|
|
|
|
|
/*----------------------------------------------------------------------------* |
|
* OPTION 2: Incremental quicksort (pivot indices are shared between calls) |
|
*----------------------------------------------------------------------------*/ |
|
|
|
/* |
|
* Partially quick-sort elements from [l,r) so that those whose ranks in the |
|
* sorted sequence belong to [lo,hi) end up in their correct positions, taking |
|
* into account the history of partitions from previous invocations. |
|
* |
|
* The pivot indices of past partitions are stored in the piv_idx array in the |
|
* order of preorder traversal of the quicksort tree. Special index values |
|
* SORTED and CHAOS signify that an interval is, respectively, fully sorted and |
|
* has not been partitioned yet (all piv_idx must initially be set to CHAOS). |
|
*/ |
|
#define INC_QSORT_CHAOS ((idx_t)-1) |
|
#define INC_QSORT_SORTED ((idx_t)-2) |
|
static void |
|
inc_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi, |
|
idx_t l, idx_t r, idx_t *piv_idx) { |
|
for (;;) { |
|
int will_be_sorted = (lo <= l && r <= hi); |
|
if unlikely(r-l <= QSORT_CUTOFF_HARD || |
|
(r-l <= QSORT_CUTOFF_SOFT && will_be_sorted)) { |
|
qsort_cutoff_sort(arr+l, r-l); |
|
*piv_idx = INC_QSORT_SORTED; |
|
return; |
|
} |
|
idx_t r1 = *piv_idx; |
|
if likely(r1 == INC_QSORT_CHAOS) { |
|
r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l)); |
|
} |
|
*piv_idx = will_be_sorted ? INC_QSORT_SORTED : r1; |
|
idx_t l1 = r1 + 1; |
|
|
|
idx_t *piv_idx_left = piv_idx + 1; |
|
idx_t *piv_idx_right = piv_idx + (l1 - l); |
|
int go_left = (lo < r1 && r1-l > 1 && *piv_idx_left != INC_QSORT_SORTED); |
|
int go_right = (l1 < hi && r-l1 > 1 && *piv_idx_right != INC_QSORT_SORTED); |
|
|
|
switch (2*go_left + go_right) { |
|
case 0: return; |
|
case 1: l = l1, piv_idx = piv_idx_right; break; |
|
case 2: r = r1, piv_idx = piv_idx_left; break; |
|
case 3: |
|
if (r1-l <= r-l1) { |
|
inc_qsort_rand_rec(rng, arr, lo, hi, l, r1, piv_idx_left); |
|
l = l1, piv_idx = piv_idx_right; |
|
} else { |
|
inc_qsort_rand_rec(rng, arr, lo, hi, l1, r, piv_idx_right); |
|
r = r1, piv_idx = piv_idx_left; |
|
} |
|
break; |
|
} |
|
} |
|
} |
|
static inline void |
|
inc_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi || *piv_idxs == INC_QSORT_SORTED) {return;} |
|
|
|
idx_rng_t rng = idx_rng((uintptr_t)arr); |
|
idx_t i = idx_rand(&rng, 0, len); |
|
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0]))); |
|
|
|
inc_qsort_rand_rec(&rng, arr, lo, hi, 0, len, piv_idxs); |
|
} |
|
|
|
|
|
/*----------------------------------------------------------------------------* |
|
* OPTION 3: Floyd-Rivest partitioning + N largest scan using binary heap |
|
*----------------------------------------------------------------------------*/ |
|
|
|
/* |
|
* Binary heap implementation. Supports both min- and max-heaps, as well as |
|
* both left-to-right and right-to-left storage layout in memory. |
|
*/ |
|
#define heap_less(x, y) ((min_heap) ? item_less((x), (y)) : item_less((y), (x))) |
|
#define heap_at(i) ((root)[(store_backwards) ? -(ptrdiff_t)(i) : +(ptrdiff_t)(i)]) |
|
static inline void |
|
heap_fix_down(int min_heap, int store_backwards, item_t *root, ptrdiff_t node, ptrdiff_t len) { |
|
surely(len >= 0); |
|
item_t x = heap_at(node); |
|
for (ptrdiff_t child, first_leaf = len/2; node < first_leaf; node = child) { |
|
child = (node << 1) + 1; |
|
child += (likely(child+1 != len) && heap_less(heap_at(child+1), heap_at(child))); |
|
if (!heap_less(heap_at(child), x)) {break;} |
|
heap_at(node) = heap_at(child); |
|
} |
|
heap_at(node) = x; |
|
} |
|
#undef heap_less |
|
#undef heap_at |
|
static inline void |
|
heap_build(int min_heap, int store_backwards, item_t *root, ptrdiff_t len) { |
|
surely(len >= 0); |
|
/* Unlike repeated insertion, this only takes linear time. */ |
|
for (ptrdiff_t i = len/2; i != 0; i--) { |
|
heap_fix_down(min_heap, store_backwards, root, i-1, len); |
|
} |
|
} |
|
#define lr_max_heap_fix_down(root, node, len) (heap_fix_down(0,0, (root), (node), (len))) |
|
#define rl_min_heap_fix_down(root, node, len) (heap_fix_down(1,1, (root), (node), (len))) |
|
#define lr_max_heap_build(root, len) (heap_build(0,0, (root), (len))) |
|
#define rl_min_heap_build(root, len) (heap_build(1,1, (root), (len))) |
|
|
|
/* |
|
* Use Floyd-Rivest Select algorithm to partition elements in [beg,end) using |
|
* the piv_rank-th largest element in that interval (counting from zero) for |
|
* the pivot, which hence ends up at exactly the piv_rank index in the array |
|
* when partitioning completes. |
|
* |
|
* - Floyd, R. W., Rivest, R. L. (1975). Expected time bounds for selection. |
|
* Communications of the ACM, 18(3), 165-172. |
|
* - Kiwiel, K. C. (2005). On Floyd and Rivest's SELECT algorithm. |
|
* Theoretical Computer Science, 347(1-2), 214-238. |
|
*/ |
|
static void |
|
partition_floyd_rivest(item_t *arr, idx_t beg, idx_t end, idx_t piv_rank) { |
|
enum {CUTOFF_LEN = 600}; /* At most 4 levels of recursion for 32-bit ranges. */ |
|
surely(beg <= end); surely(beg <= piv_rank); surely(piv_rank < end); |
|
ptrdiff_t l = beg, r = end-1, k = piv_rank; |
|
while (l < r) { |
|
ptrdiff_t n = r - l + 1; |
|
ptrdiff_t m = k - l + 1; |
|
if likely(n > CUTOFF_LEN) { |
|
double log_n = log(n); |
|
ptrdiff_t s = (ptrdiff_t)exp(log_n*(2/3.0)) / 2; |
|
ptrdiff_t g = (1 - 2*(m < n/2)) * (ptrdiff_t)sqrt(log_n*(n-s)*s/n) / 2; |
|
ptrdiff_t l1 = k + g - m*s/n; |
|
ptrdiff_t r1 = k + g + (n-m)*s/n; |
|
if unlikely(l1 < l) {l1 = l;} |
|
if unlikely(r1 > r) {r1 = r;} |
|
partition_floyd_rivest(arr, l1, r1+1, k); |
|
} |
|
ptrdiff_t p = l + partition(arr+l, n, m-1); |
|
if (k <= p) {r = p-1;} |
|
if (p <= k) {l = p+1;} |
|
} |
|
} |
|
/* |
|
* Partially heap-sort elements from [0,len) so that those whose ranks in |
|
* the sorted sequence belong to [lo,hi) take their correct positions. |
|
* |
|
* If the requested sorting interval is closer to the beginning of the array: |
|
* |
|
* (A1) Partition the array around the (hi-1)-st largest element as a pivot. |
|
* (A2) Build a right-to-left min-heap out of elements in [lo,hi) so that |
|
* the root node holding the minimum value is stored at index hi-1. |
|
* (A3) For each element in [0,lo), if it is exceeding the current heap |
|
* minimum, exchange the former with the latter, fixing the heap. |
|
* (A4) Fill [lo,hi) going left to right with successively removed heap |
|
* minimums, simultaneously shortening (and maintaining) the heap. |
|
* |
|
* Otherwise, if the interval is closer to the end of the array: |
|
* |
|
* (B1) Partition the array around the lo-th largest element as a pivot. |
|
* (B2) Build a left-to-right max-heap out of elements in [lo,hi) so that |
|
* the root node holding the maximum value is stored at index lo. |
|
* (B3) For each element in [hi,len), if it is inferior to the current heap |
|
* maximum, exchange the former with the latter, fixing the heap. |
|
* (B4) Fill [lo,hi) going right to left with successively removed heap |
|
* maximums, simultaneously shortening (and maintaining) the heap. |
|
*/ |
|
static inline void |
|
part_hsort_frsel(item_t *arr, idx_t len, idx_t lo, idx_t hi) { |
|
surely(len >= 0); surely(0 <= lo); surely(lo < hi); surely(hi <= len); |
|
if (len <= 1 || lo >= hi) {return;} |
|
if (lo < len-hi) { |
|
partition_floyd_rivest(arr, 0, len, hi-1); |
|
|
|
idx_t heap_len = hi - lo; |
|
item_t *heap_root = arr + hi - 1; |
|
rl_min_heap_build(heap_root, heap_len); |
|
for (idx_t i = 0; i != lo; i++) { |
|
if (!item_less(*heap_root, arr[i])) {continue;} |
|
swap(arr[i], *heap_root); |
|
rl_min_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
for (idx_t i = lo; i != hi; i++) { |
|
swap(arr[i], *heap_root); |
|
rl_min_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} else { |
|
partition_floyd_rivest(arr, 0, len, lo); |
|
|
|
idx_t heap_len = hi - lo; |
|
item_t *heap_root = arr + lo; |
|
lr_max_heap_build(heap_root, heap_len); |
|
for (idx_t i = hi; i != len; i++) { |
|
if (!item_less(arr[i], *heap_root)) {continue;} |
|
swap(arr[i], *heap_root); |
|
lr_max_heap_fix_down(heap_root, 0, heap_len); |
|
} |
|
for (idx_t i = hi-1; i != lo; i--) { |
|
swap(arr[i], *heap_root); |
|
lr_max_heap_fix_down(heap_root, 0, --heap_len); |
|
} |
|
} |
|
} |
|
|
|
|
|
/*----------------------------------------------------------------------------* |
|
* BENCHMARKS |
|
*----------------------------------------------------------------------------*/ |
|
|
|
#include <stdio.h> /* printf() */ |
|
#include <stdlib.h> /* calloc(), free() */ |
|
#include <time.h> /* clock_gettime() */ |
|
|
|
static inline uint64_t |
|
nsec(void) { |
|
struct timespec t; |
|
clock_gettime(CLOCK_MONOTONIC_RAW, &t); |
|
return (uint64_t)t.tv_sec*1000*1000*1000 + (uint64_t)t.tv_nsec; |
|
} |
|
|
|
struct pcg32 {uint64_t s, t;}; |
|
static inline struct pcg32 |
|
pcg32(uint64_t seed) { |
|
uint64_t a = splitmix64_next(&seed); |
|
uint64_t b = splitmix64_next(&seed); |
|
return (struct pcg32){.s = a, .t = b|1}; |
|
} |
|
static inline uint32_t |
|
pcg32_next(struct pcg32 *r) { |
|
uint32_t k = r->s >> 59; |
|
uint32_t x = (r->s ^ (r->s>>18)) >> 27; |
|
r->s = r->s*0x5851F42D4C957F2Du + r->t; |
|
return (x >> k) | (x << (-k & 31)); |
|
} |
|
static inline void |
|
shuffle(struct pcg32 *r, item_t *arr, uint32_t n) { |
|
for (uint32_t i = 0; i < n; i++) { |
|
swap(arr[i], arr[onto_range_u32(pcg32_next(r), i, n)]); |
|
} |
|
} |
|
|
|
static inline void |
|
call_part_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
part_qsort_rand(arr, len, lo, hi); (void)piv_idxs; |
|
} |
|
static inline void |
|
call_part_hsort_frsel(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) { |
|
part_hsort_frsel(arr, len, lo, hi); (void)piv_idxs; |
|
} |
|
|
|
extern int |
|
main(void) { |
|
enum { |
|
RANDOMIZE = 0, |
|
VERBOSE = 0, |
|
}; |
|
typedef void partial_sort_test_fn(item_t*, idx_t*, idx_t, idx_t, idx_t); |
|
static const struct test_func { |
|
partial_sort_test_fn *func; |
|
const char *name; |
|
} funcs[] = { |
|
#define X(p,f) {glue(p,f), #f} |
|
X(,inc_qsort_rand), |
|
X(call_,part_qsort_rand), |
|
X(call_,part_hsort_frsel), |
|
#undef X |
|
}; enum {N_FUNCS = sizeof(funcs)/sizeof(funcs[0])}; |
|
static const struct test_params { |
|
int n_runs; |
|
int n_sorts; |
|
idx_t arr_len; |
|
idx_t min_sort_len; |
|
idx_t max_sort_len; |
|
} tests[] = { |
|
{100, 5, 100, 5, 20}, |
|
{7000, 1, 100*1000, 100, 1000}, |
|
{500, 100, 100*1000, 100, 1000}, |
|
{30, 500, 1000*1000, 100, 5000}, |
|
{8, 2000, 1000*1000, 100, 5000}, |
|
{4, 500, 10*1000*1000, 100, 10*1000}, |
|
{4, 25, 100*1000*1000, 100, 10*1000}, |
|
}; enum {N_TESTS = sizeof(tests)/sizeof(tests[0])}; |
|
|
|
idx_t max_arr_len = 0; |
|
for (int i = 0; i < N_TESTS; i++) { |
|
if (max_arr_len < tests[i].arr_len) { |
|
max_arr_len = tests[i].arr_len; |
|
} |
|
} |
|
item_t *arr = calloc(max_arr_len, sizeof(arr[0])); |
|
idx_t *piv_idxs = calloc(max_arr_len, sizeof(piv_idxs[0])); |
|
surely(arr); surely(piv_idxs); |
|
|
|
uint64_t rng_seed = RANDOMIZE ? nsec() : 1889; |
|
|
|
for (int i_func = 0; i_func < N_FUNCS; i_func++) { |
|
partial_sort_test_fn *const partial_sort = funcs[i_func].func; |
|
printf("\nFunction %s\n", funcs[i_func].name); |
|
|
|
struct pcg32 rng = pcg32(rng_seed); |
|
|
|
for (int i_test = 0; i_test < N_TESTS; i_test++) { |
|
const struct test_params T = tests[i_test]; |
|
if (VERBOSE) { |
|
printf("Test %3d. Parameters: %d x %d, %td, %td..%td\n", |
|
i_test+1, T.n_runs, T.n_sorts, (ptrdiff_t)T.arr_len, |
|
(ptrdiff_t)T.min_sort_len, (ptrdiff_t)T.max_sort_len); |
|
} |
|
|
|
uint64_t test_dt = 0; |
|
for (int i_run = 0; i_run < T.n_runs; i_run++) { |
|
/* Initialize the array to a random permutation (of its indices). */ |
|
for (idx_t i = 0; i < T.arr_len; i++) {arr[i] = (item_t)i;} |
|
shuffle(&rng, arr, T.arr_len); |
|
/* Set pivot indices to INC_QSORT_CHAOS for inc_qsort_rand(): */ |
|
tacitly(INC_QSORT_CHAOS == (idx_t)-1); |
|
memset(piv_idxs, 0xFF, T.arr_len*sizeof(piv_idxs[0])); |
|
|
|
uint64_t run_dt = 0; |
|
for (int i_sort = 0; i_sort < T.n_sorts; i_sort++) { |
|
idx_t span = onto_range_u32(pcg32_next(&rng), T.min_sort_len, T.max_sort_len+1); |
|
idx_t lo = onto_range_u32(pcg32_next(&rng), 0, T.arr_len-span+1); |
|
idx_t hi = lo + span; |
|
|
|
uint64_t t0 = nsec(); |
|
(*partial_sort)(arr, piv_idxs, T.arr_len, lo, hi); |
|
uint64_t dt = nsec() - t0; |
|
run_dt += dt; |
|
|
|
int ok = 1; idx_t first_diff = 0; |
|
for (idx_t i = lo; i < hi; i++) { |
|
if (arr[i] != (item_t)i) { |
|
ok = 0, first_diff = i; |
|
break; |
|
} |
|
} |
|
if (VERBOSE || !ok) { |
|
printf("Test %3d, run %3d, sort %3d. [%12td %12td] (%6td): %4s %.9f\n", |
|
i_test+1, i_run+1, i_sort+1, (ptrdiff_t)lo, (ptrdiff_t)hi-1, (ptrdiff_t)span, |
|
ok ? "OK" : "FAIL", dt*1e-9/span); |
|
} |
|
if (!ok) { |
|
enum {BEFORE = 5, AFTER = 10}; |
|
idx_t l = first_diff-lo >= BEFORE ? first_diff-BEFORE : lo; |
|
idx_t r = hi-first_diff >= AFTER+1 ? first_diff+AFTER+1 : hi; |
|
printf(" [%td..%td]:", (ptrdiff_t)l, (ptrdiff_t)r-1); |
|
for (idx_t i = l; i < r; i++) { |
|
printf(" %llu", (unsigned long long)arr[i]); |
|
} |
|
printf("\n"); |
|
} |
|
} |
|
test_dt += run_dt; |
|
|
|
if (VERBOSE) { |
|
printf("Test %3d, run %3d. Elapsed: %9.6f\n", |
|
i_test+1, i_run+1, run_dt*1e-9); |
|
} |
|
} |
|
|
|
printf("Test %3d. Elapsed: %7.3f total, %9.6f per run\n", |
|
i_test+1, test_dt*1e-9, test_dt*1e-9/T.n_runs); |
|
} |
|
} |
|
|
|
free(arr), arr = 0; |
|
free(piv_idxs), piv_idxs = 0; |
|
|
|
return 0; |
|
} |