Skip to content

Instantly share code, notes, and snippets.

@igorburago
Last active July 3, 2025 16:14
Show Gist options
  • Save igorburago/fa340429e75007a41335fb660d419e52 to your computer and use it in GitHub Desktop.
Save igorburago/fa340429e75007a41335fb660d419e52 to your computer and use it in GitHub Desktop.
Fast partial sorting of an array on a rank subinterval
// Copyright 2024 Igor Burago
// SPDX-License-Identifier: ISC
#include <math.h> // for rank_partition()
#include <stddef.h> // ptrdiff_t
#include <stdint.h>
#include <string.h> // memcpy(), memmove(), memset()
// Built-in u128 is not strictly required, but it makes mcg64 simpler and faster.
typedef __uint128_t uint128_t;
#define must_inline inline __attribute__((always_inline))
#define must_not_inline __attribute__((noinline))
#define inline_within __attribute__((flatten))
#if __has_builtin(__builtin_unpredictable)
#define fickle(x) (__builtin_unpredictable((x)))
#else
#define fickle(x) (x)
#endif
#if __has_builtin(__builtin_expect)
#define usually(x, c) (__builtin_expect((x), (c)))
#else
#define usually(x, c) (x)
#endif
#define likely(x) (usually(!!(x), 1))
#define unlikely(x) (usually(!!(x), 0))
#define glue_raw(x, y) x##y
#define glue(x, y) glue_raw(x, y)
#define dub(name) glue(name, glue(__dub_L, __LINE__))
#define apriori(x) extern void *(*compile_time_check_dummy(void))[ \
sizeof(struct { unsigned compile_time_check : (x) ? 1 : -1; })]
// #define DEBUG_BUILD
#if defined(DEBUG_BUILD)
#include <stdio.h>
#define surely(x) (likely(x) ? (void)0 : \
(fprintf(stderr, "FATAL: %s:%d: surely(%s) does not hold!\n", __FILE__, __LINE__, #x), \
fflush(NULL), \
__builtin_trap()))
#define unreachable() surely(0 && "This code path must never be reached.")
#else
#if __has_builtin(__builtin_unreachable)
#define unreachable() (__builtin_unreachable())
#else
#define unreachable() ((void)(*(volatile int *)0 = 0))
#endif
static must_inline void surely_failure_deadend(void) { for (;;) { unreachable(); } }
#define surely(x) (likely(x) ? (void)0 : surely_failure_deadend())
#endif
#define swap(x, y) do { \
void *dub(xp) = &(x), *dub(yp) = &(y); \
apriori(sizeof(x) == sizeof(y)); \
unsigned char dub(tmp)[sizeof(x)]; \
memcpy(dub(tmp), dub(xp), sizeof(dub(tmp))); \
memmove(dub(xp), dub(yp), sizeof(dub(tmp))); \
memcpy(dub(yp), dub(tmp), sizeof(dub(tmp))); \
} while (0)
static inline uint64_t
splitmix64_next(uint64_t *s) {
uint64_t x = *s += 0x9E3779B97F4A7C15u;
x = (x^(x>>30)) * 0xBF58476D1CE4E5B9u;
x = (x^(x>>27)) * 0x94D049BB133111EBu;
return x^(x>>31);
}
struct pcg32 { uint64_t s, t; };
static inline struct pcg32
pcg32(uint64_t seed) {
uint64_t a = splitmix64_next(&seed);
uint64_t b = splitmix64_next(&seed);
return (struct pcg32){.s = a, .t = b|1};
}
static inline uint32_t
pcg32_next(struct pcg32 *r) {
uint32_t k = r->s >> 59;
uint32_t x = (r->s ^ (r->s>>18)) >> 27;
r->s = r->s*0x5851F42D4C957F2Du + r->t;
return (x >> k) | (x << (-k & 31));
}
struct mcg64 { uint128_t s; };
static inline struct mcg64
mcg64(uint64_t seed) {
uint64_t a = splitmix64_next(&seed);
uint64_t b = splitmix64_next(&seed);
return (struct mcg64){((uint128_t)a << 64) + b};
}
static must_inline uint64_t
mcg64_next(struct mcg64 *r) {
return (r->s *= 0xDA942042E4DD58B5u) >> 64;
}
// Fast and only slightly biased for large (end-beg) approaching u32/u64 max.
static must_inline uint32_t
onto_range_u32(uint32_t x, uint32_t beg, uint32_t end) {
x = ((uint64_t)x * (end - beg)) >> 32;
return beg + x;
}
static must_inline uint64_t
onto_range_u64(uint64_t x, uint64_t beg, uint64_t end) {
x = ((uint128_t)x * (end - beg)) >> 64;
return beg + x;
}
static must_inline uint64_t
load_u64(void *p, size_t size) {
union { unsigned char b[sizeof(uint64_t)]; uint64_t u; } x = {.u = 0};
memcpy(x.b, p, size <= sizeof(uint64_t) ? size : sizeof(uint64_t));
return x.u;
}
//-----------------------------------------------------------------------------*
// ARRAY TYPE PARAMETERS
//-----------------------------------------------------------------------------*
typedef int item_t;
#define item_less(x, y) ((x) < (y))
// Both signed and unsigned types are supported:
typedef uint32_t index_t;
#if 1
#define INDEX_RNG_USING_PCG
#endif
#if defined(INDEX_RNG_USING_PCG)
typedef struct pcg32 index_rng_t;
#define index_rng(seed) (pcg32(seed))
#define index_rand(rng, len) (onto_range_u32(pcg32_next(rng), 0, (len)))
#else
// MCG is faster than LCG or PCG, and is usually good enough for quicksort:
typedef struct mcg64 index_rng_t;
#define index_rng(seed) (mcg64(seed))
#define index_rand(rng, len) (onto_range_u64(mcg64_next(rng), 0, (len)))
#endif
//-----------------------------------------------------------------------------*
// OPTION 1: One-off partial quicksort (no state shared between calls)
//-----------------------------------------------------------------------------*
#define max(x, y) ((x) > (y) ? (x) : (y))
enum {
QUICKSORT_CUTOFF_HARD = 4,
QUICKSORT_CUTOFF_SOFT = max(8, 64/sizeof(item_t)),
};
apriori(QUICKSORT_CUTOFF_HARD >= 3); // We use the median-of-three pivoting.
// Insertion sort.
static inline void
quicksort_cutoff_sort(item_t *arr, index_t len) {
surely(len >= 0);
for (index_t i = (len != 0); i != len; i++) {
if fickle(!item_less(arr[i], arr[i-1])) { continue; }
item_t x = arr[i];
index_t j = i;
do { arr[j] = arr[j-1]; } while (--j != 0 && fickle(item_less(x, arr[j-1])));
arr[j] = x;
}
}
// Hoare-partition elements in [0,len[ using the item at pivot_idx for pivot.
static must_inline index_t
partition(item_t *arr, index_t len, index_t pivot_idx) {
surely(len >= 2), surely(0 <= pivot_idx), surely(pivot_idx < len);
item_t piv = arr[pivot_idx];
arr[pivot_idx] = arr[0];
pivot_idx = item_less(arr[len-1], piv) ? len-1 : 0;
arr[0] = arr[pivot_idx];
arr[pivot_idx] = piv;
index_t i = 0, j = len-1;
for (;;) {
do { i++; } while (item_less(arr[i], piv));
do { j--; } while (item_less(piv, arr[j]));
if (i >= j) { break; }
swap(arr[i], arr[j]);
}
j += (pivot_idx != 0);
swap(arr[j], arr[pivot_idx]);
return j;
}
static must_inline index_t
median_rand_3(index_rng_t *rng, item_t *arr, index_t len) {
surely(len >= 3);
index_t i[3];
i[0] = index_rand(rng, len-0);
i[1] = index_rand(rng, len-1);
i[2] = index_rand(rng, len-2);
unsigned i1_past_i0 = (i[1] >= i[0]);
i[1] += i1_past_i0; // Count i[1]-th item, jumping over i[0]-th.
i[2] += (i[2] >= i[!i1_past_i0]); // Count i[2]-th, skipping min(i[0],i[1])-th...
i[2] += (i[2] >= i[i1_past_i0]); // ...and then max(i[0],i[1])-th.
unsigned gt01 = !!item_less(arr[i[1]], arr[i[0]]);
unsigned lt02 = !!item_less(arr[i[0]], arr[i[2]]);
unsigned lt12 = !!item_less(arr[i[1]], arr[i[2]]);
return i[(gt01^lt02) + (lt02^lt12)];
}
// Partially quick-sort elements from [l,r[ so that those whose ranks in
// the sorted sequence belong to [lo,hi[ take their correct positions.
static void
partial_quicksort_rand_rec(index_rng_t *rng, item_t *arr,
index_t lo, index_t hi, index_t l, index_t r) {
for (;;) {
if unlikely(r-l <= QUICKSORT_CUTOFF_HARD ||
(r-l <= QUICKSORT_CUTOFF_SOFT && lo <= l && r <= hi)) {
quicksort_cutoff_sort(arr+l, r-l);
return;
}
index_t pivot_idx = median_rand_3(rng, arr+l, r-l);
index_t r_left = l + partition(arr+l, r-l, pivot_idx);
index_t l_right = r_left + 1;
unsigned go_left = (lo < r_left && r_left-l > 1);
unsigned go_right = (l_right < hi && r-l_right > 1);
switch fickle((go_left<<1) | go_right) {
case 0: { return; }
case 1: right: { l = l_right; break; }
case 2: left: { r = r_left; break; }
case 3: {
if (r_left-l <= r-l_right) {
partial_quicksort_rand_rec(rng, arr, lo, hi, l, r_left);
goto right;
} else {
partial_quicksort_rand_rec(rng, arr, lo, hi, l_right, r);
goto left;
}
}
default: { unreachable(); }
}
}
}
static inline void
partial_quicksort_rand(item_t *arr, index_t len, index_t lo, index_t hi) {
surely(len >= 0), surely(0 <= lo), surely(lo <= hi), surely(hi <= len);
if (len <= 1 || lo >= hi) { return; }
index_rng_t rng = index_rng((uintptr_t)arr);
index_t i = index_rand(&rng, len);
rng = index_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0])));
partial_quicksort_rand_rec(&rng, arr, lo, hi, 0, len);
}
//-----------------------------------------------------------------------------*
// OPTION 2: Incremental quicksort (pivot indices are shared between calls)
//-----------------------------------------------------------------------------*
// Partially quick-sort elements from [l,r[ so that those whose ranks in the
// sorted sequence belong to [lo,hi[ end up in their correct positions, taking
// into account the history of partitions from previous invocations.
//
// The pivot indices of past partitions are stored in the part_idx array in
// the order of preorder traversal of the quicksort tree. Special index values
// SORTED and CHAOS signify that an interval is, respectively, fully sorted and
// has not been partitioned yet (all part_idx must initially be set to CHAOS).
#define INC_QUICKSORT_CHAOS ((index_t)-1)
#define INC_QUICKSORT_SORTED ((index_t)-2)
static void
inc_quicksort_rand_rec(index_rng_t *rng, item_t *arr, index_t lo, index_t hi,
index_t l, index_t r, index_t *part_idx) {
for (;;) {
int will_be_sorted = (lo <= l && r <= hi);
if unlikely(r-l <= QUICKSORT_CUTOFF_HARD ||
(r-l <= QUICKSORT_CUTOFF_SOFT && will_be_sorted)) {
quicksort_cutoff_sort(arr+l, r-l);
*part_idx = INC_QUICKSORT_SORTED;
return;
}
index_t r_left = *part_idx;
if likely(r_left == INC_QUICKSORT_CHAOS) {
index_t pivot_idx = median_rand_3(rng, arr+l, r-l);
r_left = l + partition(arr+l, r-l, pivot_idx);
}
*part_idx = will_be_sorted ? INC_QUICKSORT_SORTED : r_left;
index_t l_right = r_left + 1;
unsigned go_left = (lo < r_left && r_left-l > 1);
unsigned go_right = (l_right < hi && r-l_right > 1);
index_t *part_idx_left = part_idx + 1;
index_t *part_idx_right = part_idx + (l_right - l);
go_left = (go_left && *part_idx_left != INC_QUICKSORT_SORTED);
go_right = (go_right && *part_idx_right != INC_QUICKSORT_SORTED);
switch fickle((go_left<<1) | go_right) {
case 0: { return; }
case 1: right: { l = l_right, part_idx = part_idx_right; break; }
case 2: left: { r = r_left, part_idx = part_idx_left; break; }
case 3: {
if (r_left-l <= r-l_right) {
inc_quicksort_rand_rec(rng, arr, lo, hi, l, r_left, part_idx_left);
goto right;
} else {
inc_quicksort_rand_rec(rng, arr, lo, hi, l_right, r, part_idx_right);
goto left;
}
}
default: { unreachable(); }
}
}
}
static inline void
inc_quicksort_rand(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) {
surely(len >= 0), surely(0 <= lo), surely(lo <= hi), surely(hi <= len);
if (len <= 1 || lo >= hi || *part_idxs == INC_QUICKSORT_SORTED) { return; }
index_rng_t rng = index_rng((uintptr_t)arr);
index_t i = index_rand(&rng, len);
rng = index_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0])));
inc_quicksort_rand_rec(&rng, arr, lo, hi, 0, len, part_idxs);
}
//-----------------------------------------------------------------------------*
// OPTION 3: Floyd-Rivest partitioning + N largest scan using binary heap
//-----------------------------------------------------------------------------*
// Binary heap implementation. Supports both min- and max-heaps, as well as
// both left-to-right and right-to-left storage layout in memory.
#define heap_superior(x, y) ((min_heap) ? item_less((x), (y)) : item_less((y), (x)))
#define heap_at(i) (*((stored_backwards) ? (root)-(i) : (root)+(i)))
static inline void
heap_fix_down(int min_heap, int stored_backwards, item_t *root, index_t node, index_t len) {
surely(len > 0), surely(0 <= node), surely(node < len);
item_t x = heap_at(node);
for (index_t child; child = (node<<1)|1, child < len; node = child) {
if likely(child+1 != len) {
child += !!heap_superior(heap_at(child+1), heap_at(child));
}
if fickle(!heap_superior(heap_at(child), x)) { break; }
heap_at(node) = heap_at(child);
}
heap_at(node) = x;
}
#undef heap_superior
#undef heap_at
static inline_within void
heap_build(int min_heap, int stored_backwards, item_t *root, index_t len) {
surely(len >= 0);
for (index_t i = len/2; i != 0; i--) {
heap_fix_down(min_heap, stored_backwards, root, i-1, len);
}
}
#define lr_max_heap_fix_down(root, node, len) (heap_fix_down(0,0, (root), (node), (len)))
#define rl_min_heap_fix_down(root, node, len) (heap_fix_down(1,1, (root), (node), (len)))
#define lr_max_heap_build(root, len) (heap_build(0,0, (root), (len)))
#define rl_min_heap_build(root, len) (heap_build(1,1, (root), (len)))
// Use Floyd-Rivest Select algorithm to partition elements in [beg,end[ using
// the pivot_rank-th largest element in that interval (counting from zero) for
// the pivot, which hence ends up at exactly the pivot_rank index in the array
// when partitioning completes.
//
// - Floyd, R. W., Rivest, R. L. (1975). Expected time bounds for selection.
// Communications of the ACM, 18(3), 165-172.
// - Kiwiel, K. C. (2005). On Floyd and Rivest's SELECT algorithm.
// Theoretical Computer Science, 347(1-2), 214-238.
static void
rank_partition(item_t *arr, index_t beg, index_t end, index_t pivot_rank) {
enum { CUTOFF_LEN = 600 }; // At most 4 levels of recursion for 32-bit ranges.
surely(beg <= end), surely(beg <= pivot_rank), surely(pivot_rank < end);
ptrdiff_t l = beg, r = end-1, k = pivot_rank;
while (l < r) {
ptrdiff_t n = r - l + 1;
ptrdiff_t m = k - l + 1;
if (n > CUTOFF_LEN) {
double log_n = log(n);
ptrdiff_t s = (ptrdiff_t)exp(log_n*(2/3.0)) / 2;
ptrdiff_t g = (1 - 2*(m < n/2)) * (ptrdiff_t)sqrt(log_n*(n-s)*s/n) / 2;
ptrdiff_t l_sample = k + g - m*s/n;
ptrdiff_t r_sample = k + g + (n-m)*s/n;
if unlikely(l_sample < l) { l_sample = l; }
if unlikely(r_sample > r) { r_sample = r; }
rank_partition(arr, l_sample, r_sample+1, k);
}
ptrdiff_t p = l + partition(arr+l, n, m-1);
if fickle(k <= p) { r = p-1; }
if fickle(p <= k) { l = p+1; }
}
}
// Partially heap-sort elements from [0,len[ so that those whose ranks in
// the sorted sequence belong to [lo,hi[ take their correct positions.
//
// If the requested sorting interval is closer to the beginning of the array:
//
// (A1) Partition the array around the (hi-1)-st largest element as a pivot.
// (A2) Build a right-to-left min-heap out of elements in [lo,hi[ so that
// the root node holding the minimum value is stored at index hi-1.
// (A3) For each element in [0,lo[, if it is exceeding the current heap
// minimum, exchange the former with the latter, fixing the heap.
// (A4) Fill [lo,hi[ going left to right with successively removed heap
// minimums, simultaneously shortening (and maintaining) the heap.
//
// Otherwise, if the interval is closer to the end of the array:
//
// (B1) Partition the array around the lo-th largest element as a pivot.
// (B2) Build a left-to-right max-heap out of elements in [lo,hi[ so that
// the root node holding the maximum value is stored at index lo.
// (B3) For each element in [hi,len[, if it is inferior to the current heap
// maximum, exchange the former with the latter, fixing the heap.
// (B4) Fill [lo,hi[ going right to left with successively removed heap
// maximums, simultaneously shortening (and maintaining) the heap.
static inline void
rank_partitioned_heapsort(item_t *arr, index_t len, index_t lo, index_t hi) {
surely(len >= 0), surely(0 <= lo), surely(lo < hi), surely(hi <= len);
if (len <= 1 || lo >= hi) { return; }
if (lo < len-hi) {
rank_partition(arr, 0, len, hi-1);
index_t heap_len = hi - lo;
item_t *heap_root = arr + hi - 1;
rl_min_heap_build(heap_root, heap_len);
for (index_t i = 0; i != lo; i++) {
if (!item_less(*heap_root, arr[i])) { continue; }
swap(arr[i], *heap_root);
rl_min_heap_fix_down(heap_root, 0, heap_len);
}
for (index_t i = lo; i != hi-1; i++) {
swap(arr[i], *heap_root);
rl_min_heap_fix_down(heap_root, 0, --heap_len);
}
} else {
if likely(lo != 0) { rank_partition(arr, 0, len, lo); }
index_t heap_len = hi - lo;
item_t *heap_root = arr + lo;
lr_max_heap_build(heap_root, heap_len);
for (index_t i = hi; i != len; i++) {
if (!item_less(arr[i], *heap_root)) { continue; }
swap(arr[i], *heap_root);
lr_max_heap_fix_down(heap_root, 0, heap_len);
}
for (index_t i = hi-1; i != lo; i--) {
swap(arr[i], *heap_root);
lr_max_heap_fix_down(heap_root, 0, --heap_len);
}
}
}
//-----------------------------------------------------------------------------*
// BENCHMARKS
//-----------------------------------------------------------------------------*
#include <stdio.h> // printf()
#include <stdlib.h> // calloc(), free()
#include <time.h> // clock_gettime()
static inline uint64_t
nsec(void) {
struct timespec t;
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
return (uint64_t)t.tv_sec*1000*1000*1000 + (uint64_t)t.tv_nsec;
}
static inline void
shuffle(struct pcg32 *r, item_t *arr, uint32_t n) {
for (uint32_t i = 0; i < n; i++) {
swap(arr[i], arr[onto_range_u32(pcg32_next(r), i, n)]);
}
}
static inline void
call_partial_quicksort_rand(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) {
(void)part_idxs;
partial_quicksort_rand(arr, len, lo, hi);
}
static inline void
call_rank_partitioned_heapsort(item_t *arr, index_t *part_idxs, index_t len, index_t lo, index_t hi) {
(void)part_idxs;
rank_partitioned_heapsort(arr, len, lo, hi);
}
extern int
main(void) {
enum {
RANDOMIZE = 0,
VERBOSE = 0,
};
typedef void partial_sort_test_func(item_t *, index_t *, index_t, index_t, index_t);
static struct test_func {
partial_sort_test_func *func;
char const *name;
} const funcs[] = {
#define X(p,f) {glue(p,f), #f}
X(call_,rank_partitioned_heapsort),
X(call_,partial_quicksort_rand),
X(,inc_quicksort_rand),
#undef X
}; enum { N_FUNCS = sizeof(funcs)/sizeof(funcs[0]) };
static struct test_params {
int n_runs;
int n_sorts;
index_t arr_len;
index_t min_sort_len;
index_t max_sort_len;
} const tests[] = {
{100, 5, 100, 5, 20},
{7000, 1, 100*1000, 100, 1000},
{500, 100, 100*1000, 100, 1000},
{30, 500, 1000*1000, 100, 5000},
{8, 2000, 1000*1000, 100, 5000},
{4, 500, 10*1000*1000, 100, 10*1000},
{4, 25, 100*1000*1000, 100, 10*1000},
}; enum { N_TESTS = sizeof(tests)/sizeof(tests[0]) };
index_t max_arr_len = 0;
for (int i = 0; i < N_TESTS; i++) {
if (max_arr_len < tests[i].arr_len) {
max_arr_len = tests[i].arr_len;
}
}
item_t *arr = calloc(max_arr_len, sizeof(arr[0]));
index_t *part_idxs = calloc(max_arr_len, sizeof(part_idxs[0]));
surely(arr != NULL), surely(part_idxs != NULL);
if (!arr || !part_idxs) { return 1; }
uint64_t const rng_seed = RANDOMIZE ? nsec() : 809;
for (int i_func = 0; i_func < N_FUNCS; i_func++) {
partial_sort_test_func *const partial_sort = funcs[i_func].func;
printf("\nFunction %s\n", funcs[i_func].name);
struct pcg32 rng = pcg32(rng_seed);
for (int i_test = 0; i_test < N_TESTS; i_test++) {
struct test_params const T = tests[i_test];
if (VERBOSE) {
printf("Test %3d. Parameters: %d x %d, %td, %td..%td\n",
i_test+1, T.n_runs, T.n_sorts, (ptrdiff_t)T.arr_len,
(ptrdiff_t)T.min_sort_len, (ptrdiff_t)T.max_sort_len);
}
uint64_t test_dt = 0;
for (int i_run = 0; i_run < T.n_runs; i_run++) {
// Initialize the array to a random permutation (of its indices).
for (index_t i = 0; i < T.arr_len; i++) { arr[i] = (item_t)i; }
shuffle(&rng, arr, T.arr_len);
// Set pivot indices to INC_QUICKSORT_CHAOS for inc_quicksort_rand():
apriori(INC_QUICKSORT_CHAOS == (index_t)-1);
memset(part_idxs, 0xFF, T.arr_len*sizeof(part_idxs[0]));
uint64_t run_dt = 0;
for (int i_sort = 0; i_sort < T.n_sorts; i_sort++) {
index_t span = onto_range_u32(pcg32_next(&rng), T.min_sort_len, T.max_sort_len+1);
index_t lo = onto_range_u32(pcg32_next(&rng), 0, T.arr_len-span+1);
index_t hi = lo + span;
uint64_t t0 = nsec();
(*partial_sort)(arr, part_idxs, T.arr_len, lo, hi);
uint64_t dt = nsec() - t0;
run_dt += dt;
int ok = 1; index_t first_diff = 0;
for (index_t i = lo; i < hi; i++) {
if (arr[i] != (item_t)i) {
ok = 0, first_diff = i;
break;
}
}
if (VERBOSE || !ok) {
printf("Test %3d, run %3d, sort %3d. [%12td %12td] (%6td): %4s %.9f\n",
i_test+1, i_run+1, i_sort+1, (ptrdiff_t)lo, (ptrdiff_t)hi-1, (ptrdiff_t)span,
ok ? "OK" : "FAIL", dt*1e-9/span);
}
if (!ok) {
enum { BEFORE = 5, AFTER = 10 };
index_t l = first_diff-lo >= BEFORE ? first_diff-BEFORE : lo;
index_t r = hi-first_diff >= AFTER+1 ? first_diff+AFTER+1 : hi;
printf(" [%td..%td]:", (ptrdiff_t)l, (ptrdiff_t)r-1);
for (index_t i = l; i < r; i++) {
printf(" %llu", (unsigned long long)arr[i]);
}
printf("\n");
}
}
test_dt += run_dt;
if (VERBOSE) {
printf("Test %3d, run %3d. Elapsed: %9.6f\n",
i_test+1, i_run+1, run_dt*1e-9);
}
}
printf("Test %3d. Elapsed: %7.3f total, %9.6f per run\n",
i_test+1, test_dt*1e-9, test_dt*1e-9/T.n_runs);
fflush(stdout);
}
}
free(arr), arr = NULL;
free(part_idxs), part_idxs = NULL;
return 0;
}

Machine: MacBook Air (M1, 2020). All times are in seconds.

$ clang -std=c99 -Wall -Wextra -Wno-unused-function -O3 -lm -o partsort partsort.c && ./partsort

Function rank_partitioned_heapsort
Test   1. Elapsed:   0.000 total,  0.000003 per run
Test   2. Elapsed:   2.441 total,  0.000349 per run
Test   3. Elapsed:   7.807 total,  0.015613 per run
Test   4. Elapsed:  18.609 total,  0.620295 per run
Test   5. Elapsed:  11.069 total,  1.383626 per run
Test   6. Elapsed:  58.315 total, 14.578769 per run
Test   7. Elapsed: 118.626 total, 29.656508 per run

Function partial_quicksort_rand
Test   1. Elapsed:   0.000 total,  0.000003 per run
Test   2. Elapsed:   4.218 total,  0.000603 per run
Test   3. Elapsed:   5.663 total,  0.011326 per run
Test   4. Elapsed:  13.308 total,  0.443609 per run
Test   5. Elapsed:  13.314 total,  1.664282 per run
Test   6. Elapsed:  17.212 total,  4.303028 per run
Test   7. Elapsed:  14.652 total,  3.662908 per run

Function inc_quicksort_rand
Test   1. Elapsed:   0.001 total,  0.000005 per run
Test   2. Elapsed:   4.212 total,  0.000602 per run
Test   3. Elapsed:   1.655 total,  0.003310 per run
Test   4. Elapsed:   1.449 total,  0.048305 per run
Test   5. Elapsed:   0.447 total,  0.055869 per run
Test   6. Elapsed:   1.444 total,  0.360975 per run
Test   7. Elapsed:   6.325 total,  1.581167 per run
ISC License
Copyright 2024 Igor Burago
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment