Skip to content

Instantly share code, notes, and snippets.

@CmdQ
Last active September 5, 2023 06:22
Show Gist options
  • Save CmdQ/1d80c043c99d29eb2bb6 to your computer and use it in GitHub Desktop.
Save CmdQ/1d80c043c99d29eb2bb6 to your computer and use it in GitHub Desktop.
Radix sort for (un-)signed integer types, float & double.
/* Copyright (c) 2015 Tobias Becker
**
**
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and associated documentation files (the "Software"), to deal
** in the Software without restriction, including without limitation the rights
** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
** copies of the Software, and to permit persons to whom the Software is
** furnished to do so, subject to the following conditions:
**
**
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Software.
**
**
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
** THE SOFTWARE.
*/
#pragma once
#include <vector>
#include <array>
#include <limits>
#include <iterator>
#include <algorithm>
#include <numeric>
#include <type_traits>
#include <utility>
#include <cassert>
/* PROPERTIES:
** - Inplace
** - O(k * n)
** - O(n) temporary memory needed.
** - Stable sort due to the fact that it is a LSB radix sort.
** - Works with forward iterators but takes advantage of random access iterators
** with contiguous memory to save unnecessary copies of the temporary array.
** - Contiguous memory mode is deduced from the iterator type being random access.
** If your data struct does not fit in, specify manually! Otherwise, at least in
** debug mode an assertion should be triggered.
** - All byte-sized histograms are generated in scan over the input.
** - A fully sorted input is detected in this first pass and sorting is done.
** - During histogram generation, pathological rounds are remembered to be skipped
** later because they wouldn't change any ordering anyway. For instance sorting n
** unsigned integers that are all < 256 would only take O(2 * n) instead of O(5 * n).
** - All three cases (unsigned integer, signed integer and (signed) floating point)
** are handled in one block without runtime overhead that stems from unused
** cases. The compiler should optimize unneeded checks away because they're known
** at compile time.
** - Floating point bit manipulation is compliant with C++'s strict alias rules.
** - Can switch to insertion sort for small n.
*/
//#define INSERTION_SORT_FOR_SMALL_N
namespace RadixSortDetails
{
using byte = unsigned char;
template<class T>
using lim = typename std::numeric_limits<T>;
template<class Iter>
using iterator_value_type = typename std::iterator_traits<Iter>::value_type;
enum
{
HISTOGRAM_SIZE = lim<byte>::max() + 1,
HISTOGRAM_HALF = HISTOGRAM_SIZE / 2
};
template<class T>
/// Extract byte from any integer (shift = 0 for LSB).
inline byte const * ubyte(T const & num)
{
static_assert(std::is_arithmetic<T>::value, "Only for fundamental number types that are allowed.");
static_assert(lim<T>::is_integer || lim<T>::is_iec559,
"Only know how to handle ints and (32-bit and 64-bit) IEEE-754 floating point values.");
/*
* This is in accordance with the standard, which allowes casting any pointer to an unsigned
* char pointer for inspection (i.e. reading). Using memcpy would have been the other option,
* but as we only need one byte anyway and this is easily available though indexing this seemed
* like the better choice.
*/
return reinterpret_cast<byte const *>(&num);
}
template<class Iter>
bool generate_histograms(Iter const begin, Iter const end, typename std::iterator_traits<Iter>::difference_type const n,
std::array<std::array<int, HISTOGRAM_SIZE>, sizeof(iterator_value_type<Iter>)> & histograms)
{
assert(histograms.data());
bool sorted = true;
int max_counts[sizeof(iterator_value_type<Iter>)]{};
auto previous = begin;
// One-time pass over input to build all histograms at once. Also check if sorting is necessary.
for (auto iter = begin; iter != end; ++iter)
{
if (*previous > *iter)
{
sorted = false;
}
previous = iter;
auto const positions = ubyte(*iter);
int shift = 0;
for (auto & hist : histograms)
{
if (++hist[positions[shift]] > max_counts[shift])
{
max_counts[shift] = hist[positions[shift]];
}
++shift;
}
}
for (auto const & h : histograms)
{
assert(std::accumulate(h.begin(), h.end(), 0) == n);
}
for (int i = 0; i < sizeof(iterator_value_type<Iter>); ++i)
{
if (max_counts[i] == n)
{
histograms[i][0] = -1;
}
}
return sorted;
}
template<class Iter, class Iter2>
/// For non-contiguous memory (last parameter), we always have to copy in between rounds.
inline void post_round_copy(Iter & begin, Iter,
Iter2 & begin2, Iter2 & end2,
bool, std::false_type)
{
std::copy(begin2, end2, begin);
}
template<class Iter>
/// For contiguous memory (last parameter), we can replace in-between copying by swapping iterators.
inline void post_round_copy(Iter & begin, Iter & end,
Iter & begin2, Iter & end2,
bool & swapped, std::true_type)
{
using namespace std;
/*
* We can only do this check here, because (forward_)list will never get here and we *know* that one
* of the ranges comes from a vector, which is contiguous.
*
* So this can only fail for some random access data structure that does not use contiguous memory.
* But then again the template parameter CONTIGUOUS should have been manually set to false and we
* would not have come here.
*/
assert(distance(begin, end) > 1
&& distance(begin, end) == &end[-1] - &*begin + 1
&& &end[-1] - &*begin == &end2[-1] - &*begin2);
swap(begin, begin2);
swap(end, end2);
swapped = !swapped;
}
#ifdef INSERTION_SORT_FOR_SMALL_N
template<class Iter>
void insertionsort(Iter const begin, Iter const end)
{
using namespace std;
if (begin == end)
{
return;
}
for (auto unsorted = next(begin); unsorted != end; ++unsorted)
{
auto const current = *unsorted;
auto shift = unsorted;
do
{
auto before = prev(shift);
if (*before > current)
{
*shift = *before;
shift = move(before);
}
else
{
break;
}
} while (shift != begin);
*shift = current;
}
}
#endif
}
/// Radix sort for all (un-)signed integer types and 32-bit float and 64-bit double (as long as IEEE-754 compatible).
template<class Iter,
bool CONTIGUOUS = std::is_same<typename std::iterator_traits<Iter>::iterator_category,
std::random_access_iterator_tag>::value>
void radixsort(Iter begin, Iter end)
{
using namespace std;
using namespace RadixSortDetails;
using T = typename iterator_traits<Iter>::value_type;
using limT = lim<T>;
static_assert(sizeof(T) <= 8, "Not made for an extended long double.");
auto const n = distance(begin, end);
#ifdef INSERTION_SORT_FOR_SMALL_N
if (n < 48 * sizeof(T))
{
insertionsort(begin, end);
return;
}
#else
if (n < 2)
{
return;
}
#endif
// Holds a handfull of histograms.
array<array<int, HISTOGRAM_SIZE>, sizeof(T)> histograms{};
// This returns true, if input is already sorted. Then we don't have to do anything.
if (!generate_histograms(begin, end, n, histograms))
{
// Count the number of negative HSBs.
auto const & last = histograms.back();
auto const negatives = limT::is_signed
? accumulate(last.cbegin() + HISTOGRAM_HALF, last.cend(), 0)
: 0;
int offsets[HISTOGRAM_SIZE];
vector<T> tmp(n);
auto begin2 = tmp.begin();
// These two are only needed for CONTIGUOUS
auto end2 = tmp.end();
bool swapped = false;
int shift_bytes = 0;
for (auto const & hist : histograms)
{
if (hist.front() >= 0)
{
// If we need special care, it will only be in the last round...
bool const last_round = &hist[0] == &last[0];
// ... and only if we have a signed type.
bool const special = last_round && limT::is_signed;
// Negative values need come first. We do this by shifting the first positive offset.
*offsets = special ? negatives : 0;
// The first (positive half is the same for everyone.
int i;
for (i = 1; i < HISTOGRAM_HALF; ++i)
{
offsets[i] = offsets[i - 1] + hist[i - 1];
}
if (special && limT::is_iec559)
{
/* This is for floating point values. Without special care, they'd come out
* sorted by absolute value after the positive ones, like this:
*
* 1, 3, 5, -2, -4, -6
*
* So the 0 offset goes to the end and we start summing backwards (but only for
* the second (negative) half).
*/
// Downward upper half...
offsets[HISTOGRAM_SIZE - 1] = 0;
for (i = HISTOGRAM_SIZE - 2; i >= HISTOGRAM_HALF; --i)
{
offsets[i] = offsets[i + 1] + hist[i + 1];
}
// ... and upward lower half.
for (i = HISTOGRAM_HALF; i < HISTOGRAM_SIZE; ++i)
{
offsets[i] += hist[i];
}
}
else
{
// For signed integers, the offsets can be build in increasing order as for unsigned ones.
if (special)
{
// Except the signed integers need the 09 offset at the beginning of the second (negative) half.
offsets[i++] = 0;
}
// After that they're the same again.
for (; i < HISTOGRAM_SIZE; ++i)
{
offsets[i] = offsets[i - 1] + hist[i - 1];
}
}
// Now values are written to the offsets previously computed.
// This establishes the order, byte by byte.
for (auto iter = begin; iter != end; ++iter)
{
auto const radix = ubyte(*iter)[shift_bytes];
if (special && limT::is_iec559 && radix >= HISTOGRAM_HALF)
{
begin2[--offsets[radix]] = *iter;
}
else
{
begin2[offsets[radix]++] = *iter;
}
}
// Either swap iterator roles for CONTIGUOUS, or copy from tmp to input...
post_round_copy(begin, end, begin2, end2, swapped, integral_constant<bool, CONTIGUOUS>());
}
// ... and repeat with the next higher byte.
++shift_bytes;
}
// We might end up with swapped roles. Then we have to do an additional copy.
if (CONTIGUOUS && swapped)
{
// Temporary vector will die after this anyway.
move(begin, end, begin2);
}
}
}
template<class C>
/// Sort given container range with radix sort.
inline C & radixsort(C && cont)
{
using namespace std;
radixsort(begin(cont), end(cont));
return cont;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment