Skip to content

Instantly share code, notes, and snippets.

@jarulsamy
Last active February 2, 2023 19:12
Show Gist options
  • Save jarulsamy/88abfa03ce7abf1e9e35968f3f1c69bc to your computer and use it in GitHub Desktop.
Save jarulsamy/88abfa03ce7abf1e9e35968f3f1c69bc to your computer and use it in GitHub Desktop.
/* An alternative to qsort, with an identical interface.
This file is part of the GNU C Library.
Copyright (C) 1992-2023 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <alloca.h>
#include <atomic.h>
#include <errno.h>
#include <memcopy.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define UINT32 0
#define UINT64 1
#define ULONG 2
#define PTR 3
#define DEFAULT 4
#define THRESHOLD 16
#define min_cmp(a, b, cmp, arg) (((*cmp))((a), (b), (arg)) < 0 ? (a) : (b))
#define max_cmp(a, b, cmp, arg) (((*cmp))((a), (b), (arg)) >= 0 ? (a) : (b))
struct msort_param
{
size_t s;
unsigned var;
__compar_d_fn_t cmp;
void *arg;
char *t;
};
inline static void sort2(const struct msort_param *p, void *a, void *b)
{
const __compar_d_fn_t cmp = p->cmp;
const size_t s = p->s;
void *arg = p->arg;
char *tmp = __alloca(s);
switch (p->var)
{
case UINT32:
// 32-bit
*(uint32_t *)tmp = *(uint32_t *)min_cmp(a, b, cmp, arg);
*(uint32_t *)b = *(uint32_t *)max_cmp(a, b, cmp, arg);
*(uint32_t *)a = *(uint32_t *)tmp;
return;
case UINT64:
*(uint64_t *)tmp = *(uint64_t *)min_cmp(a, b, cmp, arg);
*(uint64_t *)b = *(uint64_t *)max_cmp(a, b, cmp, arg);
*(uint64_t *)a = *(uint64_t *)tmp;
return;
case PTR:
// clang-format off
*(const void **)tmp = min_cmp((*(const void **)a), (*(const void **)b), cmp, arg);
*(const void **)b = max_cmp((*(const void **)a), (*(const void **)b), cmp, arg);
*(const void **)a = *(void **)tmp;
// clang-format on
return;
default:
memcpy(tmp, min_cmp(a, b, cmp, arg), s);
memcpy(b, max_cmp(a, b, cmp, arg), s);
memcpy(a, tmp, s);
return;
}
}
inline static void sort3(const struct msort_param *p, void *p0, void *p1,
void *p2)
{
sort2(p, p0, p1);
sort2(p, p1, p2);
sort2(p, p0, p1);
}
inline static void sort4(const struct msort_param *p, void *p0, void *p1,
void *p2, void *p3)
{
sort2(p, p0, p1);
sort2(p, p2, p3);
sort2(p, p0, p2);
sort2(p, p1, p3);
sort2(p, p1, p2);
}
inline static void ins_sort(const struct msort_param *const p, void *b,
size_t n)
{
const __compar_d_fn_t cmp = p->cmp;
const size_t s = p->s;
const unsigned var = p->var;
char *tmp = p->t;
void *arg = p->arg;
char *base = (char *)b;
switch (n)
{
case 2:
sort2(p, base, base + s);
return;
case 3:
sort3(p, base, base + s, base + (2 * s));
return;
case 4:
sort4(p, base, base + s, base + (2 * s), base + (3 * s));
return;
}
unsigned c;
switch (var)
{
case UINT32:
for (size_t i = 1; i < n; ++i)
{
c = 1;
size_t j = i - 1;
if ((*cmp)(base + (j * s), base + (i * s), arg) > 0)
{
*(uint32_t *)tmp = ((uint32_t *)base)[i];
do
{
((uint32_t *)base)[j + 1] = ((uint32_t *)base)[j];
if (j == 0)
{
c = 0;
goto outer0;
}
j--;
} while ((*cmp)(base + (j * s), tmp, arg) > 0);
outer0:
((uint32_t *)base)[j + c] = *(uint32_t *)tmp;
}
}
return;
case UINT64:
for (size_t i = 1; i < n; ++i)
{
c = 1;
size_t j = i - 1;
if ((*cmp)(base + (j * s), base + (i * s), arg) > 0)
{
*(uint64_t *)tmp = ((uint64_t *)base)[i];
do
{
((uint64_t *)base)[j + 1] = ((uint64_t *)base)[j];
if (j == 0)
{
c = 0;
goto outer1;
}
j--;
} while ((*cmp)(base + (j * s), tmp, arg) > 0);
outer1:
((uint64_t *)base)[j + c] = *(uint64_t *)tmp;
}
}
return;
case PTR:
for (size_t i = 1; i < n; ++i)
{
c = 1;
size_t j = i - 1;
if ((*cmp)(((const void **)base)[j], ((const void **)base)[i], arg) > 0)
{
*(void **)tmp = ((void **)base)[i];
do
{
((void **)base)[j + 1] = ((void **)base)[j];
if (j == 0)
{
c = 0;
goto outer3;
}
--j;
} while ((*cmp)(((const void **)base)[j], *(const void **)tmp, arg) >
0);
outer3:
((void **)base)[j + c] = *(void **)tmp;
}
}
return;
default:
for (size_t i = 1; i < n; ++i)
{
c = 1;
size_t j = i - 1;
if ((*cmp)(base + (j * s), base + (i * s), arg) > 0)
{
memcpy(tmp, base + (i * s), s);
do
{
/* base[j + 1] = base[j]; */
memcpy(base + ((j + 1) * s), base + (j * s), s);
if (j == 0)
{
c = 0;
goto outer4;
}
j--;
} while ((*cmp)(base + (j * s), tmp, arg) > 0);
outer4:
/* base[j + c] = v; */
memcpy(base + ((j + c) * s), tmp, s);
}
}
return;
}
}
static void msort_with_tmp(const struct msort_param *const p, void *b,
const size_t n)
{
char *b1, *b2;
size_t n1, n2;
char *tmp = p->t;
const size_t s = p->s;
const __compar_d_fn_t cmp = p->cmp;
void *arg = p->arg;
if (n <= 1) return;
if (n < THRESHOLD)
{
ins_sort(p, b, n);
return;
}
n1 = n / 2;
n2 = n - n1;
b1 = b;
b2 = (char *)b + (n1 * p->s);
msort_with_tmp(p, b1, n1);
msort_with_tmp(p, b2, n2);
switch (p->var)
{
case UINT32:
while (n1 > 0 && n2 > 0)
{
if ((*cmp)(b1, b2, arg) <= 0)
{
*(uint32_t *)tmp = *(uint32_t *)b1;
b1 += sizeof(uint32_t);
--n1;
}
else
{
*(uint32_t *)tmp = *(uint32_t *)b2;
b2 += sizeof(uint32_t);
--n2;
}
tmp += sizeof(uint32_t);
}
break;
case UINT64:
while (n1 > 0 && n2 > 0)
{
if ((*cmp)(b1, b2, arg) <= 0)
{
*(uint64_t *)tmp = *(uint64_t *)b1;
b1 += sizeof(uint64_t);
--n1;
}
else
{
*(uint64_t *)tmp = *(uint64_t *)b2;
b2 += sizeof(uint64_t);
--n2;
}
tmp += sizeof(uint64_t);
}
break;
case ULONG:
while (n1 > 0 && n2 > 0)
{
unsigned long *tmpl = (unsigned long *)tmp;
unsigned long *bl;
tmp += s;
if ((*cmp)(b1, b2, arg) <= 0)
{
bl = (unsigned long *)b1;
b1 += s;
--n1;
}
else
{
bl = (unsigned long *)b2;
b2 += s;
--n2;
}
while (tmpl < (unsigned long *)tmp) *tmpl++ = *bl++;
}
break;
case PTR:
while (n1 > 0 && n2 > 0)
{
if ((*cmp)(*(const void **)b1, *(const void **)b2, arg) <= 0)
{
*(void **)tmp = *(void **)b1;
b1 += sizeof(void *);
--n1;
}
else
{
*(void **)tmp = *(void **)b2;
b2 += sizeof(void *);
--n2;
}
tmp += sizeof(void *);
}
break;
default:
while (n1 > 0 && n2 > 0)
{
if ((*cmp)(b1, b2, arg) <= 0)
{
tmp = (char *)mempcpy(tmp, b1, s);
b1 += s;
--n1;
}
else
{
tmp = (char *)mempcpy(tmp, b2, s);
b2 += s;
--n2;
}
}
break;
}
if (n1 > 0) mempcpy(tmp, b1, n1 * s);
mempcpy(b, p->t, (n - n2) * s);
}
void __qsort_r(void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg)
{
size_t size = n * s;
char *tmp = NULL;
struct msort_param p;
/* For large object sizes use indirect sorting. */
if (s > 32) size = 2 * n * sizeof(void *) + s;
if (size < 1024) /* The temporary array is small, so put it on the stack. */
p.t = __alloca(size);
else
{
/* We should avoid allocating too much memory since this might
have to be backed up by swap space. */
static long int phys_pages;
static int pagesize;
if (pagesize == 0)
{
phys_pages = __sysconf(_SC_PHYS_PAGES);
if (phys_pages == -1)
/* Error while determining the memory size. So let's
assume there is enough memory. Otherwise the
implementer should provide a complete implementation of
the `sysconf' function. */
phys_pages = (long int)(~0ul >> 1);
/* The following determines that we will never use more than
a quarter of the physical memory. */
phys_pages /= 4;
/* Make sure phys_pages is written to memory. */
atomic_write_barrier();
pagesize = __sysconf(_SC_PAGESIZE);
}
/* Just a comment here. We cannot compute
phys_pages * pagesize
and compare the needed amount of memory against this value.
The problem is that some systems might have more physical
memory then can be represented with a `size_t' value (when
measured in bytes. */
/* If the memory requirements are too high don't allocate memory. */
if (size / pagesize > (size_t)phys_pages)
{
_quicksort(b, n, s, cmp, arg);
return;
}
/* It's somewhat large, so malloc it. */
int save = errno;
tmp = malloc(size);
__set_errno(save);
if (tmp == NULL)
{
/* Couldn't get space, so use the slower algorithm
that doesn't need a temporary array. */
_quicksort(b, n, s, cmp, arg);
return;
}
p.t = tmp;
}
p.s = s;
p.var = DEFAULT;
p.cmp = cmp;
p.arg = arg;
if (s > 32)
{
/* Indirect sorting. */
char *ip = (char *)b;
void **tp = (void **)(p.t + n * sizeof(void *));
void **t = tp;
void *tmp_storage = (void *)(tp + n);
while ((void *)t < tmp_storage)
{
*t++ = ip;
ip += s;
}
p.s = sizeof(void *);
p.var = PTR;
msort_with_tmp(&p, p.t + n * sizeof(void *), n);
/* tp[0] .. tp[n - 1] is now sorted, copy around entries of
the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
char *kp;
size_t i;
for (i = 0, ip = (char *)b; i < n; i++, ip += s)
if ((kp = tp[i]) != ip)
{
size_t j = i;
char *jp = ip;
memcpy(tmp_storage, ip, s);
do
{
size_t k = (kp - (char *)b) / s;
tp[j] = jp;
memcpy(jp, kp, s);
j = k;
jp = kp;
kp = tp[k];
} while (kp != ip);
tp[j] = jp;
memcpy(jp, tmp_storage, s);
}
}
else
{
if ((s & (sizeof(uint32_t) - 1)) == 0 &&
((char *)b - (char *)0) % __alignof__(uint32_t) == 0)
{
if (s == sizeof(uint32_t))
p.var = UINT32;
else if (s == sizeof(uint64_t) &&
((char *)b - (char *)0) % __alignof__(uint64_t) == 0)
p.var = UINT64;
else if ((s & (sizeof(unsigned long) - 1)) == 0 &&
((char *)b - (char *)0) % __alignof__(unsigned long) == 0)
p.var = ULONG;
}
msort_with_tmp(&p, b, n);
}
free(tmp);
}
libc_hidden_def(__qsort_r) weak_alias(__qsort_r, qsort_r)
void qsort(void *b, size_t n, size_t s, __compar_fn_t cmp)
{
return __qsort_r(b, n, s, (__compar_d_fn_t)cmp, NULL);
}
libc_hidden_def(qsort)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment