Last active
June 15, 2020 13:24
-
-
Save Eisenwave/a35d12019cf4635563ec05875830e0fa to your computer and use it in GitHub Desktop.
C++ Bit Manipulation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef UTIL_BITS_HPP | |
#define UTIL_BITS_HPP | |
#include "assert.hpp" | |
#include "math.hpp" | |
#include "vec.hpp" | |
#include <algorithm> | |
#include <cstddef> | |
#include <cstdint> | |
#include <limits> | |
namespace mve { | |
/** | |
* @brief templated variable which contains the number of bits for any given integer type. | |
* Example: bits_v<uint32_t> = 32 | |
*/ | |
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0> | |
constexpr size_t bits_v = 0; | |
template <> | |
constexpr size_t bits_v<char> = 8; | |
template <> | |
constexpr size_t bits_v<uint8_t> = 8; | |
template <> | |
constexpr size_t bits_v<uint16_t> = 16; | |
template <> | |
constexpr size_t bits_v<uint32_t> = 32; | |
template <> | |
constexpr size_t bits_v<uint64_t> = 64; | |
template <> | |
constexpr size_t bits_v<int8_t> = 8; | |
template <> | |
constexpr size_t bits_v<int16_t> = 16; | |
template <> | |
constexpr size_t bits_v<int32_t> = 32; | |
template <> | |
constexpr size_t bits_v<int64_t> = 64; | |
/** | |
* @brief Returns whether an unsigned integer is a power of 2 or zero. | |
* Note that this test is faster than having to test if val is a power of 2. | |
* @param val the parameter to test | |
* @return true if val is a power of 2 or if val is zero | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr bool is_pow2_or_zero(Uint val) | |
{ | |
return (val & (val - 1)) == 0; | |
} | |
/** | |
* @brief Returns whether an unsigned integer is a power of 2. | |
* @param val the parameter to test | |
* @return true if val is a power of 2 | |
* @see is_pow2_or_zero | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr bool is_pow2(Uint val) | |
{ | |
return val != 0 && is_pow2_or_zero(val); | |
} | |
/** | |
* @brief Naive implementation of log2 using repeated single-bit rightshifting. | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr Uint log2_floor_naive(Uint val) noexcept | |
{ | |
Uint result = 0; | |
while (val >>= 1) { | |
++result; | |
} | |
return result; | |
} | |
/** | |
* @brief Fast implementation of log2. | |
* See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. | |
* | |
* Unrolled 32-bit version: | |
* unsigned shift = (v > 0xFFFF) << 4; | |
v >>= shift; | |
r |= shift; | |
shift = (v > 0xFF ) << 3; | |
v >>= shift; | |
r |= shift; | |
shift = (v > 0xF ) << 2; | |
v >>= shift; | |
r |= shift; | |
shift = (v > 0x3 ) << 1; | |
v >>= shift; | |
r |= shift; | |
shift = (v > 1) << 0; | |
r >>= shift; | |
r |= shift; | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr Uint log2_floor_fast(Uint v) noexcept | |
{ | |
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>); | |
unsigned result = 0; | |
for (size_t i = iterations; i != 0; --i) { | |
unsigned compBits = 1 << (i - 1); | |
Uint compShift = Uint{1} << compBits; | |
unsigned shift = unsigned{v >= compShift} << (i - 1); | |
v >>= shift; | |
result |= shift; | |
} | |
return result; | |
} | |
/** | |
* @brief log2_floor implementation using De Bruijn multiplication. | |
* See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn. | |
* @param val | |
*/ | |
constexpr uint32_t log2_floor_debruijn(uint32_t val) noexcept | |
{ | |
constexpr uint32_t MultiplyDeBruijnBitPosition[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, | |
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; | |
// first round down to one less than a power of 2 | |
// this step is not necessary if val is a power of 2 | |
val |= val >> 1; | |
val |= val >> 2; | |
val |= val >> 4; | |
val |= val >> 8; | |
val |= val >> 16; | |
return MultiplyDeBruijnBitPosition[(val * uint32_t{0x07C4ACDD}) >> 27]; | |
} | |
/** | |
* @brief Computes the floored binary logarithm of a given integer. | |
* Example: log2_floor(100) = 6 | |
* | |
* This templated function will choose the best available method depending on the type of the integer. | |
* It may fail if a negative signed integer is given. | |
* | |
* @param v the value | |
* @return the floored binary logarithm | |
*/ | |
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0> | |
constexpr Int log2_floor(Int v) noexcept | |
{ | |
if constexpr (std::is_signed_v<Int>) { | |
using Uint = std::make_unsigned_t<Int>; | |
DEBUG_ASSERT_GE(v, 0); | |
return log2_floor_fast<Uint>(static_cast<Uint>(v)); | |
} | |
else { | |
return log2_floor_fast<Int>(v); | |
} | |
} | |
template <> | |
constexpr uint32_t log2_floor<uint32_t>(uint32_t v) noexcept | |
{ | |
return log2_floor_debruijn(v); | |
} | |
/** | |
* @brief Computes the ceiled binary logarithm of a given integer. | |
* Example: log2_ceil(100) = 7 | |
* | |
* This templated function will choose the best available method depending on the type of the integer. | |
* It may fail if a negative signed integer is given. | |
* | |
* @param v the value | |
* @return the floored binary logarithm | |
*/ | |
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0> | |
constexpr Int log2_ceil(Int val) noexcept | |
{ | |
const Int result = log2_floor(val); | |
const bool inputIsntPow2 = val != (static_cast<Int>(1) << result); | |
return result + inputIsntPow2; | |
} | |
/** | |
* @brief Rounds up an unsigned integer to the next power of 2. | |
* Examples: 100 -> 128, 1 -> 1, 3 -> 4, 3000 -> 4096 | |
* @param v the value to round up | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr Uint ceil_pow2(Uint v) | |
{ | |
DEBUG_ASSERT_NE(v, 0); | |
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>); | |
// decrement is necessary so that powers of 2 don't get increased | |
v--; | |
for (size_t i = 0; i < iterations; ++i) { | |
// after all iterations, all bits right to the msb will be filled with 1 | |
v |= v >> (1 << i); | |
} | |
// this step turns the number back to a power of two | |
return v + 1; | |
} | |
/** | |
* @brief Rounds up an unsigned integer to the next power of 2. | |
* Examples: 100 -> 64, 1 -> 1, 3 -> 2, 3000 -> 2048 | |
* @param v the value to round up | |
*/ | |
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0> | |
constexpr Uint floor_pow2(Uint v) | |
{ | |
DEBUG_ASSERT_NE(v, 0); | |
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>); | |
// leftshift is necessary to floor instead of ceil | |
v >>= 1; | |
for (size_t i = 0; i < iterations; ++i) { | |
// after all iterations, all bits right to the msb will be filled with 1 | |
v |= v >> (1 << i); | |
} | |
// this step turns the number back to a power of two | |
return v + 1; | |
} | |
/** | |
* @brief Interleaves an input number with <bits> zero-bits per input bit. Example: 0b11 -> 0b0101 | |
* @param input the input number | |
* @param bits the amount of zero bits per input bit to interleave | |
* @return the input number interleaved with input-bits | |
*/ | |
constexpr uint64_t ileave_zeros_naive(uint32_t input, size_t bits) | |
{ | |
const auto lim = std::min<size_t>(32, math::div_ceil<size_t>(64, bits + 1)); | |
uint64_t result = 0; | |
for (size_t i = 0, b_out = 0; i < lim; ++i) { | |
result |= static_cast<uint64_t>(input & 1) << b_out; | |
input >>= 1; | |
b_out += bits + 1; | |
} | |
return result; | |
} | |
/** | |
* @brief Removes each interleaved <bits> bits. Example: 0b010101 --rem 1--> 0b111 | |
* @param input the input number | |
* @param bits input bits per output bit | |
* @return the the output with removed bits | |
*/ | |
constexpr uint64_t rem_ileaved_bits_naive(uint64_t input, size_t bits) noexcept | |
{ | |
// increment once to avoid modulo divisions by 0 | |
// this way our function is noexcept and safe for all inputs | |
++bits; | |
uint64_t result = 0; | |
for (size_t i = 0, b_out = 0; i < 64; ++i) { | |
if (i % bits == 0) { | |
result |= (input & 1) << b_out++; | |
} | |
input >>= 1; | |
} | |
return result; | |
} | |
/** | |
* @brief Duplicates each input bit <bits> times. Example: 0b101 -> 0b110011 | |
* @param input the input number | |
* @param bits the output bits per input bits | |
* @return the output number or 0 if the bits parameter was zero | |
*/ | |
constexpr uint64_t dupl_bits_naive(uint64_t input, size_t bits) | |
{ | |
if (bits == 0) { | |
return 0; | |
} | |
const auto lim = math::div_ceil<size_t>(64, bits); | |
uint64_t result = 0; | |
for (size_t i = 0, b_out = 0; i < lim; ++i) { | |
for (size_t j = 0; j < bits; ++j, ++b_out) { | |
result |= static_cast<uint64_t>((input >> i) & 1) << b_out; | |
} | |
} | |
return result; | |
} | |
/** | |
* @brief Interleaves <BITS> zero-bits inbetween each input bit. | |
* @param input the input number | |
* @tparam BITS the number of bits to be interleaved, must be > 0 | |
* @return the input interleaved with <BITS> bits | |
*/ | |
template <size_t BITS> | |
constexpr uint64_t ileave_zeros(uint32_t input) | |
{ | |
if constexpr (BITS == 0) { | |
return input; | |
} | |
else { | |
constexpr uint64_t MASKS[] = { | |
dupl_bits_naive(ileave_zeros_naive(uint32_t(-1), BITS), 1), | |
dupl_bits_naive(ileave_zeros_naive(uint32_t(-1), BITS), 2), | |
dupl_bits_naive(ileave_zeros_naive(uint32_t(-1), BITS), 4), | |
dupl_bits_naive(ileave_zeros_naive(uint32_t(-1), BITS), 8), | |
dupl_bits_naive(ileave_zeros_naive(uint32_t(-1), BITS), 16), | |
}; | |
uint64_t n = input; | |
for (int i = 4; i != -1; --i) { | |
const auto shift = BITS * (1 << i); | |
n |= n << shift; | |
n &= MASKS[i]; | |
} | |
return n; | |
} | |
} | |
/** | |
* @brief Removes each interleaved <bits> bits. Example: 0b010101 --rem 1--> 0b111 | |
* @param input the input number | |
* @param bits input bits per output bit | |
* @return the the output with removed bits | |
*/ | |
template <size_t BITS> | |
constexpr uint64_t rem_ileaved_bits(uint64_t input) noexcept | |
{ | |
if constexpr (BITS == 0) { | |
return input; | |
} | |
else { | |
constexpr uint64_t MASKS[] = { | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 1), | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 2), | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 4), | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 8), | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 16), | |
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 32), | |
}; | |
input &= MASKS[0]; | |
for (size_t i = 0; i < 5; ++i) { | |
unsigned rshift = (1 << i) * BITS; | |
input |= input >> rshift; | |
input &= MASKS[i + 1]; | |
} | |
return input; | |
} | |
} | |
/** | |
* @brief Interleaves 2 integers, where hi comprises the upper bits of each bit pair and lo the lower bits. | |
* Examle: ileave2(0b111, 0b000) = 0b101010 | |
* | |
* This is also referred to as a Morton Code in scientific literature. | |
* | |
* @param hi the high bits | |
* @param lo the low bits | |
* @return the interleaved bits | |
*/ | |
constexpr uint64_t ileave2(uint32_t hi, uint32_t lo) | |
{ | |
constexpr uint64_t MASKS[] = {0x5555'5555'5555'5555, | |
0x3333'3333'3333'3333, | |
0x0F0F'0F0F'0F0F'0F0F, | |
0x00FF'00FF'00FF'00FF, | |
0x0000'FFFF'0000'FFFF}; | |
uint64_t result = 0; | |
uint32_t *nums[] = {&hi, &lo}; | |
for (size_t i = 0; i < 2; ++i) { | |
uint64_t n = *nums[i]; | |
for (size_t i = 4; i != std::numeric_limits<size_t>::max(); --i) { | |
n |= n << (1 << i); | |
n &= MASKS[i]; | |
} | |
result |= n << (1 - i); | |
} | |
return result; | |
} | |
/** | |
* @brief Interleaves 3 integers, where x comprises the uppermost bits of each bit triple and z the lowermost bits. | |
* | |
* This is also referred to as a Morton Code in scientific literature. | |
* | |
* @param x the highest bits | |
* @param y the middle bits | |
* @param z the lowest bits | |
* @return the interleaved bits | |
*/ | |
constexpr uint64_t ileave3(uint32_t x, uint32_t y, uint32_t z) | |
{ | |
return (ileave_zeros<2>(x) << 2) | (ileave_zeros<2>(y) << 1) | ileave_zeros<2>(z); | |
} | |
constexpr uint64_t ileave3_naive(uint32_t x, uint32_t y, uint32_t z) | |
{ | |
return (ileave_zeros_naive(x, 2) << 2) | (ileave_zeros_naive(y, 2) << 1) | ileave_zeros_naive(z, 2); | |
} | |
/** | |
* @brief Deinterleaves 3 integers which are interleaved in a single number. | |
* Visualization: abcdefghi -> (adg, beh, cfi) | |
* | |
* This is also referred to as a Morton Code in scientific literature. | |
* | |
* @param n the number | |
* @return the interleaved bits | |
*/ | |
constexpr Vec3u32 dileave3(uint64_t n) | |
{ | |
uint32_t x = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 2)); | |
uint32_t y = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 1)); | |
uint32_t z = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 0)); | |
return {x, y, z}; | |
} | |
constexpr Vec3u32 dileave3_naive(uint64_t n) | |
{ | |
uint32_t x = static_cast<uint32_t>(rem_ileaved_bits_naive(n >> 2, 2)); | |
uint32_t y = static_cast<uint32_t>(rem_ileaved_bits_naive(n >> 1, 2)); | |
uint32_t z = static_cast<uint32_t>(rem_ileaved_bits_naive(n >> 0, 2)); | |
return {x, y, z}; | |
} | |
} // namespace mve | |
#endif // UTIL_BITS_HPP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment