Skip to content

Instantly share code, notes, and snippets.

@igorburago
Last active March 22, 2025 16:19
Show Gist options
  • Save igorburago/fa340429e75007a41335fb660d419e52 to your computer and use it in GitHub Desktop.
Save igorburago/fa340429e75007a41335fb660d419e52 to your computer and use it in GitHub Desktop.
Fast partial sorting of an array on a subinterval
// Copyright 2024 Igor Burago
// SPDX-License-Identifier: ISC
#include <math.h> // for partition_floyd_rivest()
#include <stddef.h> // ptrdiff_t
#include <stdint.h>
#include <string.h> // memcpy(), memmove(), memset()
// Built-in u128 is not strictly required, but it makes mcg64 simpler and faster.
typedef __uint128_t uint128_t;
#define must_inline inline __attribute__((always_inline))
#define must_not_inline __attribute__((noinline))
#define inline_within __attribute__((flatten))
#if __has_builtin(__builtin_unpredictable)
#define fickle(x) (__builtin_unpredictable((x)))
#else
#define fickle(x) (x)
#endif
#if __has_builtin(__builtin_expect)
#define usually(x, c) (__builtin_expect((x), (c)))
#else
#define usually(x, c) (x)
#endif
#define likely(x) (usually(!!(x), 1))
#define unlikely(x) (usually(!!(x), 0))
#define glue_raw(x, y) x##y
#define glue(x, y) glue_raw(x, y)
#define dub(name) glue(name, glue(__L, __LINE__))
#define apriori(x) extern void *(*compile_time_check_dummy(void))[ \
sizeof(struct { unsigned compile_time_check : (x) ? 1 : -1; })]
// #define DEBUG_BUILD
#if defined DEBUG_BUILD
#include <stdio.h>
#define surely(x) (likely(x) ? (void)0 : \
(fprintf(stderr, "FATAL: %s:%d: surely(%s) has failed!\n", __FILE__, __LINE__, #x), \
fflush(NULL), \
__builtin_trap()))
#define unreachable() surely(0 && "unreachable")
#else
#if __has_builtin(__builtin_unreachable)
#define unreachable() (__builtin_unreachable())
#else
#define unreachable() (*(volatile int *)0 = 0)
#endif
#define surely(x) do { if (!(x)) { for (;;) { unreachable(); } } } while (0)
#endif
#define swap(x, y) do { \
void *dub(xp) = &(x), *dub(yp) = &(y); \
apriori(sizeof(x) == sizeof(y)); \
unsigned char dub(tmp)[sizeof(x)]; \
memcpy(dub(tmp), dub(xp), sizeof(dub(tmp))); \
memmove(dub(xp), dub(yp), sizeof(dub(tmp))); \
memcpy(dub(yp), dub(tmp), sizeof(dub(tmp))); \
} while (0)
static inline uint64_t
splitmix64_next(uint64_t *s) {
uint64_t x = *s += 0x9E3779B97F4A7C15u;
x = (x^(x>>30)) * 0xBF58476D1CE4E5B9u;
x = (x^(x>>27)) * 0x94D049BB133111EBu;
return x^(x>>31);
}
struct mcg64 { uint128_t s; };
static inline struct mcg64
mcg64(uint64_t seed) {
uint64_t a = splitmix64_next(&seed);
uint64_t b = splitmix64_next(&seed);
return (struct mcg64){((uint128_t)a << 64) + b};
}
static must_inline uint64_t
mcg64_next(struct mcg64 *r) {
return (r->s *= 0xDA942042E4DD58B5u) >> 64;
}
// Fast and only slightly biased for large (end-beg) approaching u32/u64 max.
static must_inline uint32_t
onto_range_u32(uint32_t x, uint32_t beg, uint32_t end) {
x = ((uint64_t)x * (end - beg)) >> 32;
return beg + x;
}
static must_inline uint64_t
onto_range_u64(uint64_t x, uint64_t beg, uint64_t end) {
x = ((uint128_t)x * (end - beg)) >> 64;
return beg + x;
}
static must_inline uint64_t
load_u64(void *p, size_t size) {
union { unsigned char b[sizeof(uint64_t)]; uint64_t u; } x = {.u = 0};
memcpy(x.b, p, size <= sizeof(uint64_t) ? size : sizeof(uint64_t));
return x.u;
}
//-----------------------------------------------------------------------------*
// ARRAY TYPE PARAMETERS
//-----------------------------------------------------------------------------*
typedef int item_t;
#define item_less(x, y) ((x) < (y))
// Both signed and unsigned types are supported:
typedef uint32_t idx_t;
#if 0
#define IDX_RNG_USING_PCG
#endif
#if defined IDX_RNG_USING_PCG
typedef struct pcg32 idx_rng_t;
#define idx_rng(seed) (pcg32(seed))
#define idx_rand(rng, len) (onto_range_u32(pcg32_next(rng), 0, (len)))
#else
// MCG is faster than LCG or PCG, and is usually good enough for quicksort:
typedef struct mcg64 idx_rng_t;
#define idx_rng(seed) (mcg64(seed))
#define idx_rand(rng, len) (onto_range_u64(mcg64_next(rng), 0, (len)))
#endif
//-----------------------------------------------------------------------------*
// OPTION 1: One-off partial quicksort (no state shared between calls)
//-----------------------------------------------------------------------------*
#define max(x, y) ((x) > (y) ? (x) : (y))
enum {
QSORT_CUTOFF_HARD = 4,
QSORT_CUTOFF_SOFT = max(8, 64/sizeof(item_t)),
};
apriori(QSORT_CUTOFF_HARD >= 3); // We use the median-of-three pivoting.
// Insertion sort.
static inline void
qsort_cutoff_sort(item_t *arr, idx_t len) {
surely(len >= 0);
for (idx_t i = (len != 0); i != len; i++) {
if fickle(!item_less(arr[i], arr[i-1])) { continue; }
item_t x = arr[i];
idx_t j = i;
do { arr[j] = arr[j-1]; } while (--j != 0 && fickle(item_less(x, arr[j-1])));
arr[j] = x;
}
}
// Hoare-partition elements in [0,len[ using the item at piv_idx for pivot.
static must_inline idx_t
partition(item_t *arr, idx_t len, idx_t piv_idx) {
surely(len >= 2); surely(0 <= piv_idx); surely(piv_idx < len);
item_t piv = arr[piv_idx];
arr[piv_idx] = arr[0];
piv_idx = item_less(arr[len-1], piv) ? len-1 : 0;
arr[0] = arr[piv_idx];
arr[piv_idx] = piv;
idx_t i = 0, j = len-1;
for (;;) {
do { i++; } while (item_less(arr[i], piv));
do { j--; } while (item_less(piv, arr[j]));
if (i >= j) { break; }
swap(arr[i], arr[j]);
}
j += (piv_idx != 0);
swap(arr[j], arr[piv_idx]);
return j;
}
static must_inline idx_t
median_rand_3(idx_rng_t *rng, item_t *arr, idx_t len) {
surely(len >= 3);
idx_t i[3];
i[0] = idx_rand(rng, len-0);
i[1] = idx_rand(rng, len-1);
i[2] = idx_rand(rng, len-2);
unsigned i1_past_i0 = (i[1] >= i[0]);
i[1] += i1_past_i0; // Count i[1]-th item, jumping over i[0]-th.
i[2] += (i[2] >= i[!i1_past_i0]); // Count i[2]-th, skipping min(i[0],i[1])-th...
i[2] += (i[2] >= i[i1_past_i0]); // ...and then max(i[0],i[1])-th.
unsigned gt01 = !!item_less(arr[i[1]], arr[i[0]]);
unsigned lt02 = !!item_less(arr[i[0]], arr[i[2]]);
unsigned lt12 = !!item_less(arr[i[1]], arr[i[2]]);
return i[(gt01^lt02) + (lt02^lt12)];
}
// Partially quick-sort elements from [l,r[ so that those whose ranks in
// the sorted sequence belong to [lo,hi[ take their correct positions.
static void
part_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi, idx_t l, idx_t r) {
for (;;) {
if unlikely(r-l <= QSORT_CUTOFF_HARD ||
(r-l <= QSORT_CUTOFF_SOFT && lo <= l && r <= hi)) {
qsort_cutoff_sort(arr+l, r-l);
return;
}
idx_t r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l));
idx_t l1 = r1 + 1;
unsigned go_left = (lo < r1 && r1-l > 1);
unsigned go_right = (l1 < hi && r-l1 > 1);
switch fickle((go_left<<1) | go_right) {
case 0: { return; }
case 1: right: { l = l1; break; }
case 2: left: { r = r1; break; }
case 3: {
if (r1-l <= r-l1) {
part_qsort_rand_rec(rng, arr, lo, hi, l, r1);
goto right;
} else {
part_qsort_rand_rec(rng, arr, lo, hi, l1, r);
goto left;
}
}
default: { unreachable(); }
}
}
}
static inline void
part_qsort_rand(item_t *arr, idx_t len, idx_t lo, idx_t hi) {
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len);
if (len <= 1 || lo >= hi) { return; }
idx_rng_t rng = idx_rng((uintptr_t)arr);
idx_t i = idx_rand(&rng, len);
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0])));
part_qsort_rand_rec(&rng, arr, lo, hi, 0, len);
}
//-----------------------------------------------------------------------------*
// OPTION 2: Incremental quicksort (pivot indices are shared between calls)
//-----------------------------------------------------------------------------*
// Partially quick-sort elements from [l,r[ so that those whose ranks in the
// sorted sequence belong to [lo,hi[ end up in their correct positions, taking
// into account the history of partitions from previous invocations.
//
// The pivot indices of past partitions are stored in the piv_idx array in the
// order of preorder traversal of the quicksort tree. Special index values
// SORTED and CHAOS signify that an interval is, respectively, fully sorted and
// has not been partitioned yet (all piv_idx must initially be set to CHAOS).
#define INC_QSORT_CHAOS ((idx_t)-1)
#define INC_QSORT_SORTED ((idx_t)-2)
static void
inc_qsort_rand_rec(idx_rng_t *rng, item_t *arr, idx_t lo, idx_t hi,
idx_t l, idx_t r, idx_t *piv_idx) {
for (;;) {
int will_be_sorted = (lo <= l && r <= hi);
if unlikely(r-l <= QSORT_CUTOFF_HARD ||
(r-l <= QSORT_CUTOFF_SOFT && will_be_sorted)) {
qsort_cutoff_sort(arr+l, r-l);
*piv_idx = INC_QSORT_SORTED;
return;
}
idx_t r1 = *piv_idx;
if likely(r1 == INC_QSORT_CHAOS) {
r1 = l + partition(arr+l, r-l, median_rand_3(rng, arr+l, r-l));
}
*piv_idx = will_be_sorted ? INC_QSORT_SORTED : r1;
idx_t l1 = r1 + 1;
idx_t *piv_idx_left = piv_idx + 1;
idx_t *piv_idx_right = piv_idx + (l1 - l);
unsigned go_left = (lo < r1 && r1-l > 1 && *piv_idx_left != INC_QSORT_SORTED);
unsigned go_right = (l1 < hi && r-l1 > 1 && *piv_idx_right != INC_QSORT_SORTED);
switch fickle((go_left<<1) | go_right) {
case 0: { return; }
case 1: right: { l = l1, piv_idx = piv_idx_right; break; }
case 2: left: { r = r1, piv_idx = piv_idx_left; break; }
case 3: {
if (r1-l <= r-l1) {
inc_qsort_rand_rec(rng, arr, lo, hi, l, r1, piv_idx_left);
goto right;
} else {
inc_qsort_rand_rec(rng, arr, lo, hi, l1, r, piv_idx_right);
goto left;
}
}
default: { unreachable(); }
}
}
}
static inline void
inc_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) {
surely(len >= 0); surely(0 <= lo); surely(lo <= hi); surely(hi <= len);
if (len <= 1 || lo >= hi || *piv_idxs == INC_QSORT_SORTED) { return; }
idx_rng_t rng = idx_rng((uintptr_t)arr);
idx_t i = idx_rand(&rng, len);
rng = idx_rng((uintptr_t)arr + load_u64(arr+i, (len-i)*sizeof(arr[0])));
inc_qsort_rand_rec(&rng, arr, lo, hi, 0, len, piv_idxs);
}
//-----------------------------------------------------------------------------*
// OPTION 3: Floyd-Rivest partitioning + N largest scan using binary heap
//-----------------------------------------------------------------------------*
// Binary heap implementation. Supports both min- and max-heaps, as well as
// both left-to-right and right-to-left storage layout in memory.
#define heap_superior(x, y) ((min_heap) ? item_less((x), (y)) : item_less((y), (x)))
#define heap_at(i) (*((stored_backwards) ? (root)-(i) : (root)+(i)))
static inline void
heap_fix_down(int min_heap, int stored_backwards, item_t *root, idx_t node, idx_t len) {
surely(len > 0); surely(0 <= node); surely(node < len);
item_t x = heap_at(node);
for (idx_t child; child = (node<<1)|1, child < len; node = child) {
if likely(child+1 != len) {
child += !!heap_superior(heap_at(child+1), heap_at(child));
}
if fickle(!heap_superior(heap_at(child), x)) { break; }
heap_at(node) = heap_at(child);
}
heap_at(node) = x;
}
#undef heap_superior
#undef heap_at
static inline_within void
heap_build(int min_heap, int stored_backwards, item_t *root, idx_t len) {
surely(len >= 0);
for (idx_t i = len/2; i != 0; i--) {
heap_fix_down(min_heap, stored_backwards, root, i-1, len);
}
}
#define lr_max_heap_fix_down(root, node, len) (heap_fix_down(0,0, (root), (node), (len)))
#define rl_min_heap_fix_down(root, node, len) (heap_fix_down(1,1, (root), (node), (len)))
#define lr_max_heap_build(root, len) (heap_build(0,0, (root), (len)))
#define rl_min_heap_build(root, len) (heap_build(1,1, (root), (len)))
// Use Floyd-Rivest Select algorithm to partition elements in [beg,end[ using
// the piv_rank-th largest element in that interval (counting from zero) for
// the pivot, which hence ends up at exactly the piv_rank index in the array
// when partitioning completes.
//
// - Floyd, R. W., Rivest, R. L. (1975). Expected time bounds for selection.
// Communications of the ACM, 18(3), 165-172.
// - Kiwiel, K. C. (2005). On Floyd and Rivest's SELECT algorithm.
// Theoretical Computer Science, 347(1-2), 214-238.
static void
partition_floyd_rivest(item_t *arr, idx_t beg, idx_t end, idx_t piv_rank) {
enum { CUTOFF_LEN = 600 }; // At most 4 levels of recursion for 32-bit ranges.
surely(beg <= end); surely(beg <= piv_rank); surely(piv_rank < end);
ptrdiff_t l = beg, r = end-1, k = piv_rank;
while (l < r) {
ptrdiff_t n = r - l + 1;
ptrdiff_t m = k - l + 1;
if (n > CUTOFF_LEN) {
double log_n = log(n);
ptrdiff_t s = (ptrdiff_t)exp(log_n*(2/3.0)) / 2;
ptrdiff_t g = (1 - 2*(m < n/2)) * (ptrdiff_t)sqrt(log_n*(n-s)*s/n) / 2;
ptrdiff_t l1 = k + g - m*s/n;
ptrdiff_t r1 = k + g + (n-m)*s/n;
if unlikely(l1 < l) { l1 = l; }
if unlikely(r1 > r) { r1 = r; }
partition_floyd_rivest(arr, l1, r1+1, k);
}
ptrdiff_t p = l + partition(arr+l, n, m-1);
if fickle(k <= p) { r = p-1; }
if fickle(p <= k) { l = p+1; }
}
}
// Partially heap-sort elements from [0,len[ so that those whose ranks in
// the sorted sequence belong to [lo,hi[ take their correct positions.
//
// If the requested sorting interval is closer to the beginning of the array:
//
// (A1) Partition the array around the (hi-1)-st largest element as a pivot.
// (A2) Build a right-to-left min-heap out of elements in [lo,hi[ so that
// the root node holding the minimum value is stored at index hi-1.
// (A3) For each element in [0,lo[, if it is exceeding the current heap
// minimum, exchange the former with the latter, fixing the heap.
// (A4) Fill [lo,hi[ going left to right with successively removed heap
// minimums, simultaneously shortening (and maintaining) the heap.
//
// Otherwise, if the interval is closer to the end of the array:
//
// (B1) Partition the array around the lo-th largest element as a pivot.
// (B2) Build a left-to-right max-heap out of elements in [lo,hi[ so that
// the root node holding the maximum value is stored at index lo.
// (B3) For each element in [hi,len[, if it is inferior to the current heap
// maximum, exchange the former with the latter, fixing the heap.
// (B4) Fill [lo,hi[ going right to left with successively removed heap
// maximums, simultaneously shortening (and maintaining) the heap.
static inline void
part_hsort_frsel(item_t *arr, idx_t len, idx_t lo, idx_t hi) {
surely(len >= 0); surely(0 <= lo); surely(lo < hi); surely(hi <= len);
if (len <= 1 || lo >= hi) { return; }
if (lo < len-hi) {
partition_floyd_rivest(arr, 0, len, hi-1);
idx_t heap_len = hi - lo;
item_t *heap_root = arr + hi - 1;
rl_min_heap_build(heap_root, heap_len);
for (idx_t i = 0; i != lo; i++) {
if (!item_less(*heap_root, arr[i])) { continue; }
swap(arr[i], *heap_root);
rl_min_heap_fix_down(heap_root, 0, heap_len);
}
for (idx_t i = lo; i != hi; i++) {
swap(arr[i], *heap_root);
rl_min_heap_fix_down(heap_root, 0, --heap_len);
}
} else {
if likely(lo != 0) { partition_floyd_rivest(arr, 0, len, lo); }
idx_t heap_len = hi - lo;
item_t *heap_root = arr + lo;
lr_max_heap_build(heap_root, heap_len);
for (idx_t i = hi; i != len; i++) {
if (!item_less(arr[i], *heap_root)) { continue; }
swap(arr[i], *heap_root);
lr_max_heap_fix_down(heap_root, 0, heap_len);
}
for (idx_t i = hi-1; i != lo; i--) {
swap(arr[i], *heap_root);
lr_max_heap_fix_down(heap_root, 0, --heap_len);
}
}
}
//-----------------------------------------------------------------------------*
// BENCHMARKS
//-----------------------------------------------------------------------------*
#include <stdio.h> // printf()
#include <stdlib.h> // calloc(), free()
#include <time.h> // clock_gettime()
static inline uint64_t
nsec(void) {
struct timespec t;
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
return (uint64_t)t.tv_sec*1000*1000*1000 + (uint64_t)t.tv_nsec;
}
struct pcg32 { uint64_t s, t; };
static inline struct pcg32
pcg32(uint64_t seed) {
uint64_t a = splitmix64_next(&seed);
uint64_t b = splitmix64_next(&seed);
return (struct pcg32){.s = a, .t = b|1};
}
static inline uint32_t
pcg32_next(struct pcg32 *r) {
uint32_t k = r->s >> 59;
uint32_t x = (r->s ^ (r->s>>18)) >> 27;
r->s = r->s*0x5851F42D4C957F2Du + r->t;
return (x >> k) | (x << (-k & 31));
}
static inline void
shuffle(struct pcg32 *r, item_t *arr, uint32_t n) {
for (uint32_t i = 0; i < n; i++) {
swap(arr[i], arr[onto_range_u32(pcg32_next(r), i, n)]);
}
}
static inline void
call_part_qsort_rand(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) {
part_qsort_rand(arr, len, lo, hi); (void)piv_idxs;
}
static inline void
call_part_hsort_frsel(item_t *arr, idx_t *piv_idxs, idx_t len, idx_t lo, idx_t hi) {
part_hsort_frsel(arr, len, lo, hi); (void)piv_idxs;
}
extern int
main(void) {
enum {
RANDOMIZE = 0,
VERBOSE = 0,
};
typedef void partial_sort_test_fn(item_t*, idx_t*, idx_t, idx_t, idx_t);
static const struct test_func {
partial_sort_test_fn *func;
const char *name;
} funcs[] = {
#define X(p,f) {glue(p,f), #f}
X(,inc_qsort_rand),
X(call_,part_qsort_rand),
X(call_,part_hsort_frsel),
#undef X
}; enum { N_FUNCS = sizeof(funcs)/sizeof(funcs[0]) };
static const struct test_params {
int n_runs;
int n_sorts;
idx_t arr_len;
idx_t min_sort_len;
idx_t max_sort_len;
} tests[] = {
{100, 5, 100, 5, 20},
{7000, 1, 100*1000, 100, 1000},
{500, 100, 100*1000, 100, 1000},
{30, 500, 1000*1000, 100, 5000},
{8, 2000, 1000*1000, 100, 5000},
{4, 500, 10*1000*1000, 100, 10*1000},
{4, 25, 100*1000*1000, 100, 10*1000},
}; enum { N_TESTS = sizeof(tests)/sizeof(tests[0]) };
idx_t max_arr_len = 0;
for (int i = 0; i < N_TESTS; i++) {
if (max_arr_len < tests[i].arr_len) {
max_arr_len = tests[i].arr_len;
}
}
item_t *arr = calloc(max_arr_len, sizeof(arr[0]));
idx_t *piv_idxs = calloc(max_arr_len, sizeof(piv_idxs[0]));
surely(arr); surely(piv_idxs);
uint64_t rng_seed = RANDOMIZE ? nsec() : 1889;
for (int i_func = 0; i_func < N_FUNCS; i_func++) {
partial_sort_test_fn *const partial_sort = funcs[i_func].func;
printf("\nFunction %s\n", funcs[i_func].name);
struct pcg32 rng = pcg32(rng_seed);
for (int i_test = 0; i_test < N_TESTS; i_test++) {
const struct test_params T = tests[i_test];
if (VERBOSE) {
printf("Test %3d. Parameters: %d x %d, %td, %td..%td\n",
i_test+1, T.n_runs, T.n_sorts, (ptrdiff_t)T.arr_len,
(ptrdiff_t)T.min_sort_len, (ptrdiff_t)T.max_sort_len);
}
uint64_t test_dt = 0;
for (int i_run = 0; i_run < T.n_runs; i_run++) {
// Initialize the array to a random permutation (of its indices).
for (idx_t i = 0; i < T.arr_len; i++) { arr[i] = (item_t)i; }
shuffle(&rng, arr, T.arr_len);
// Set pivot indices to INC_QSORT_CHAOS for inc_qsort_rand():
apriori(INC_QSORT_CHAOS == (idx_t)-1);
memset(piv_idxs, 0xFF, T.arr_len*sizeof(piv_idxs[0]));
uint64_t run_dt = 0;
for (int i_sort = 0; i_sort < T.n_sorts; i_sort++) {
idx_t span = onto_range_u32(pcg32_next(&rng), T.min_sort_len, T.max_sort_len+1);
idx_t lo = onto_range_u32(pcg32_next(&rng), 0, T.arr_len-span+1);
idx_t hi = lo + span;
uint64_t t0 = nsec();
(*partial_sort)(arr, piv_idxs, T.arr_len, lo, hi);
uint64_t dt = nsec() - t0;
run_dt += dt;
int ok = 1; idx_t first_diff = 0;
for (idx_t i = lo; i < hi; i++) {
if (arr[i] != (item_t)i) {
ok = 0, first_diff = i;
break;
}
}
if (VERBOSE || !ok) {
printf("Test %3d, run %3d, sort %3d. [%12td %12td] (%6td): %4s %.9f\n",
i_test+1, i_run+1, i_sort+1, (ptrdiff_t)lo, (ptrdiff_t)hi-1, (ptrdiff_t)span,
ok ? "OK" : "FAIL", dt*1e-9/span);
}
if (!ok) {
enum { BEFORE = 5, AFTER = 10 };
idx_t l = first_diff-lo >= BEFORE ? first_diff-BEFORE : lo;
idx_t r = hi-first_diff >= AFTER+1 ? first_diff+AFTER+1 : hi;
printf(" [%td..%td]:", (ptrdiff_t)l, (ptrdiff_t)r-1);
for (idx_t i = l; i < r; i++) {
printf(" %llu", (unsigned long long)arr[i]);
}
printf("\n");
}
}
test_dt += run_dt;
if (VERBOSE) {
printf("Test %3d, run %3d. Elapsed: %9.6f\n",
i_test+1, i_run+1, run_dt*1e-9);
}
}
printf("Test %3d. Elapsed: %7.3f total, %9.6f per run\n",
i_test+1, test_dt*1e-9, test_dt*1e-9/T.n_runs);
fflush(stdout);
}
}
free(arr), arr = 0;
free(piv_idxs), piv_idxs = 0;
return 0;
}

Machine: MacBook Air (M1, 2020).
All times are in seconds.

$ clang -std=c99 -Wall -Wextra -Wno-unused-function -O3 -lm -o partsort partsort.c && ./partsort

Function inc_qsort_rand
Test   1. Elapsed:   0.001 total,  0.000005 per run
Test   2. Elapsed:   4.212 total,  0.000602 per run
Test   3. Elapsed:   1.655 total,  0.003310 per run
Test   4. Elapsed:   1.449 total,  0.048305 per run
Test   5. Elapsed:   0.447 total,  0.055869 per run
Test   6. Elapsed:   1.444 total,  0.360975 per run
Test   7. Elapsed:   6.325 total,  1.581167 per run

Function part_qsort_rand
Test   1. Elapsed:   0.000 total,  0.000003 per run
Test   2. Elapsed:   4.218 total,  0.000603 per run
Test   3. Elapsed:   5.663 total,  0.011326 per run
Test   4. Elapsed:  13.308 total,  0.443609 per run
Test   5. Elapsed:  13.314 total,  1.664282 per run
Test   6. Elapsed:  17.212 total,  4.303028 per run
Test   7. Elapsed:  14.652 total,  3.662908 per run

Function part_hsort_frsel
Test   1. Elapsed:   0.000 total,  0.000003 per run
Test   2. Elapsed:   2.441 total,  0.000349 per run
Test   3. Elapsed:   7.807 total,  0.015613 per run
Test   4. Elapsed:  18.609 total,  0.620295 per run
Test   5. Elapsed:  11.069 total,  1.383626 per run
Test   6. Elapsed:  58.315 total, 14.578769 per run
Test   7. Elapsed: 118.626 total, 29.656508 per run
ISC License
Copyright 2024 Igor Burago
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment