Last active
August 31, 2025 23:26
-
-
Save jweinst1/e64646920c463d6284c4fdd824aac923 to your computer and use it in GitHub Desktop.
Uses templates and constexpr for compile time hamming lookup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <cstddef> | |
// ---------- ConstexprBitset ---------- | |
template<std::size_t N> | |
struct ConstexprBitset { | |
static constexpr std::size_t numBlocks = (N + 63) / 64; | |
std::array<uint64_t, numBlocks> blocks{}; | |
constexpr void set(std::size_t idx) { | |
blocks[idx / 64] |= (1ULL << (idx & 63)); | |
} | |
template<typename T> | |
constexpr std::size_t getSetIndexes(T* out) const { | |
std::size_t count = 0; | |
for (std::size_t block = 0; block < numBlocks; block++) { | |
uint64_t bits = blocks[block]; | |
while (bits) { | |
int idx = __builtin_ctzll(bits); | |
out[count++] = static_cast<T>(block * 64 + idx); | |
bits &= bits - 1; | |
} | |
} | |
return count; | |
} | |
}; | |
// ---------- Product Metafunction ---------- | |
template<std::size_t... Ns> | |
struct product; | |
template<> | |
struct product<> { | |
static constexpr std::size_t value = 1; | |
}; | |
template<std::size_t N, std::size_t... Ns> | |
struct product<N, Ns...> { | |
static constexpr std::size_t value = N * product<Ns...>::value; | |
}; | |
// ---------- Generate Neighbors ---------- | |
template<std::size_t NROWS, std::size_t... COUNTS> | |
constexpr auto generate_neighbors( | |
const std::array<std::array<uint8_t, 256>, NROWS>& candidates, | |
const std::array<std::size_t, NROWS>& counts) | |
{ | |
constexpr std::size_t total_size = product<COUNTS...>::value; | |
std::array<std::array<uint8_t, NROWS>, total_size> result{}; | |
std::array<std::size_t, NROWS> idx{}; | |
std::size_t outIdx = 0; | |
bool done = false; | |
while (!done) { | |
// capture permutation | |
for (std::size_t j = 0; j < NROWS; j++) { | |
result[outIdx][j] = candidates[j][idx[j]]; | |
} | |
++outIdx; | |
// advance counters | |
for (std::size_t pos = NROWS; pos-- > 0;) { | |
if (++idx[pos] < counts[pos]) { | |
for (std::size_t j = pos + 1; j < NROWS; j++) idx[j] = 0; | |
goto continue_outer; | |
} | |
} | |
done = true; | |
continue_outer:; | |
} | |
return result; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <cstddef> | |
#include <iostream> | |
// ========= ConstexprBitset ========= | |
template<std::size_t N> | |
struct ConstexprBitset { | |
static constexpr std::size_t numBlocks = (N + 63) / 64; | |
std::array<uint64_t, numBlocks> blocks{}; | |
constexpr void set(std::size_t idx) { | |
blocks[idx / 64] |= (1ULL << (idx & 63)); | |
} | |
constexpr bool empty() const { | |
for (auto b : blocks) if (b) return false; | |
return true; | |
} | |
template<typename T> | |
constexpr std::size_t getSetIndexes(T* out) const { | |
std::size_t count = 0; | |
for (std::size_t block = 0; block < numBlocks; block++) { | |
uint64_t bits = blocks[block]; | |
while (bits) { | |
int idx = __builtin_ctzll(bits); // constexpr_ctzll fallback if needed | |
out[count++] = static_cast<T>(block * 64 + idx); | |
bits &= bits - 1; | |
} | |
} | |
return count; | |
} | |
}; | |
// ========= PermutationIterator (compile-time sized) ========= | |
template<std::size_t NROWS> | |
struct PermutationIterator { | |
static constexpr std::size_t MAX_CANDIDATES = 256; | |
// rows[i][hamming-distance] = candidate bitset | |
const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& rows; | |
// storage | |
std::array<std::array<uint8_t, MAX_CANDIDATES>, NROWS> candidates{}; | |
std::array<std::size_t, NROWS> candCounts{}; | |
std::array<std::size_t, NROWS> indices{}; | |
std::array<uint8_t, NROWS> current{}; | |
bool done = false; | |
constexpr PermutationIterator(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& r) | |
: rows(r) | |
{ | |
for (std::size_t i = 0; i < NROWS; i++) { | |
std::size_t total = 0; | |
for (auto& bs : rows[i]) { | |
total += bs.getSetIndexes(candidates[i].data() + total); | |
} | |
candCounts[i] = total; | |
if (total == 0) { done = true; return; } | |
current[i] = candidates[i][0]; | |
} | |
} | |
constexpr bool next() { | |
if (done) return false; | |
for (std::size_t pos = NROWS; pos-- > 0;) { | |
if (++indices[pos] < candCounts[pos]) { | |
current[pos] = candidates[pos][indices[pos]]; | |
for (std::size_t j = pos + 1; j < NROWS; j++) { | |
indices[j] = 0; | |
current[j] = candidates[j][0]; | |
} | |
return true; | |
} | |
} | |
done = true; | |
return false; | |
} | |
constexpr const std::array<uint8_t, NROWS>& value() const { return current; } | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <cstddef> | |
#include <iostream> | |
#include <vector> | |
// ========= ConstexprBitset with array-style getSetIndexes ========= | |
template<std::size_t N> | |
struct ConstexprBitset { | |
static constexpr std::size_t numBlocks = (N + 63) / 64; | |
std::array<uint64_t, numBlocks> blocks{}; | |
constexpr void set(std::size_t idx) { | |
blocks[idx / 64] |= (1ULL << (idx & 63)); | |
} | |
constexpr bool empty() const { | |
for (auto b : blocks) if (b) return false; | |
return true; | |
} | |
template<typename T> | |
constexpr std::size_t getSetIndexes(T* out) const { | |
std::size_t count = 0; | |
for (std::size_t block = 0; block < numBlocks; block++) { | |
uint64_t bits = blocks[block]; | |
while (bits) { | |
int idx = __builtin_ctzll(bits); // portable constexpr_ctzll if needed | |
out[count++] = static_cast<T>(block * 64 + idx); | |
bits &= bits - 1; | |
} | |
} | |
return count; | |
} | |
}; | |
// ========= PermutationIterator with flat arrays ========= | |
struct PermutationIterator { | |
static constexpr std::size_t MAX_CANDIDATES = 256; | |
const std::vector<std::array<ConstexprBitset<256>, 8>>& rows; | |
std::vector<std::array<uint8_t, MAX_CANDIDATES>> candidates; // row -> array of candidates | |
std::vector<std::size_t> candCounts; // row -> how many valid | |
std::vector<std::size_t> indices; // row -> current position | |
std::vector<uint8_t> current; // assembled vector | |
bool done = false; | |
PermutationIterator(const std::vector<std::array<ConstexprBitset<256>, 8>>& r) | |
: rows(r), candidates(r.size()), candCounts(r.size()), indices(r.size(), 0), current(r.size()) | |
{ | |
// Fill flat arrays | |
for (std::size_t i = 0; i < rows.size(); i++) { | |
auto& row = rows[i]; | |
std::size_t total = 0; | |
for (auto& bs : row) { | |
total += bs.getSetIndexes(candidates[i].data() + total); | |
} | |
candCounts[i] = total; | |
if (total == 0) { done = true; return; } | |
current[i] = candidates[i][0]; | |
} | |
} | |
bool next() { | |
if (done) return false; | |
// Odometer increment | |
for (std::size_t pos = rows.size(); pos-- > 0;) { | |
if (++indices[pos] < candCounts[pos]) { | |
current[pos] = candidates[pos][indices[pos]]; | |
// reset lower positions | |
for (std::size_t j = pos + 1; j < rows.size(); j++) { | |
indices[j] = 0; | |
current[j] = candidates[j][0]; | |
} | |
return true; | |
} | |
} | |
done = true; | |
return false; | |
} | |
const std::vector<uint8_t>& value() const { return current; } | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <cstddef> | |
#include <iostream> | |
#include <bitset> | |
#include <utility> | |
#include <chrono> | |
#include <cstdlib> | |
#include <random> | |
#include <limits> | |
static float randomFloatInRange(float min, float max) { | |
return min + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (max - min))); | |
} | |
// Generate a random std::array<float, N> within [minVal, maxVal] | |
template <std::size_t N> | |
std::array<float, N> make_random_array(float minVal = 0.0f, float maxVal = 1.0f) { | |
static thread_local std::mt19937 rng(std::random_device{}()); | |
std::uniform_real_distribution<float> dist(minVal, maxVal); | |
std::array<float, N> arr{}; | |
for (auto &val : arr) { | |
val = dist(rng); | |
} | |
return arr; | |
} | |
constexpr int constexpr_ctzll(unsigned long long x) { | |
#if defined(__GNUC__) || defined(__clang__) | |
if (x == 0) return 64; // define ctz(0) = 64 (all bits zero) | |
return __builtin_ctzll(x); | |
#elif defined(_MSC_VER) | |
unsigned long idx = 0; | |
if (_BitScanForward64(&idx, x)) | |
return static_cast<int>(idx); | |
return 64; // if x == 0 | |
#else | |
// Portable fallback | |
if (x == 0) return 64; | |
int c = 0; | |
while ((x & 1ULL) == 0) { | |
x >>= 1; | |
++c; | |
} | |
return c; | |
#endif | |
} | |
template<std::size_t N> | |
struct ConstexprBitset { | |
static constexpr std::size_t numBlocks = (N + 63) / 64; | |
std::array<uint64_t, numBlocks> blocks{}; | |
// ------------------- | |
// Set & Test | |
// ------------------- | |
constexpr void set(std::size_t idx) { | |
blocks[idx / 64] |= (1ULL << (idx & 63)); | |
} | |
constexpr bool test(std::size_t idx) const { | |
return (blocks[idx / 64] >> (idx & 63)) & 1ULL; | |
} | |
// ------------------- | |
// Bitwise operators | |
// ------------------- | |
constexpr auto operator|(ConstexprBitset const& other) const { | |
ConstexprBitset res; | |
for (std::size_t i = 0; i < numBlocks; i++) | |
res.blocks[i] = blocks[i] | other.blocks[i]; | |
return res; | |
} | |
constexpr auto operator&(ConstexprBitset const& other) const { | |
ConstexprBitset res; | |
for (std::size_t i = 0; i < numBlocks; i++) | |
res.blocks[i] = blocks[i] & other.blocks[i]; | |
return res; | |
} | |
constexpr auto operator&=(ConstexprBitset const& other) { | |
for (std::size_t i = 0; i < numBlocks; i++) | |
blocks[i] &= other.blocks[i]; | |
} | |
constexpr auto operator^(ConstexprBitset const& other) const { | |
ConstexprBitset res; | |
for (std::size_t i = 0; i < numBlocks; i++) | |
res.blocks[i] = blocks[i] ^ other.blocks[i]; | |
return res; | |
} | |
constexpr explicit operator bool() const noexcept { | |
for (std::size_t i = 0; i < numBlocks; i++) { | |
if (blocks[i] != 0) | |
return true; | |
} | |
return false; | |
} | |
static constexpr ConstexprBitset<256> make_from_byte(uint8_t b) { | |
ConstexprBitset<256> bs; | |
bs.set(b); // set exactly that index | |
return bs; | |
} | |
constexpr bool isAllUnset() const noexcept { | |
return isAllUnsetImpl(std::make_index_sequence<numBlocks>{}); | |
} | |
template<typename T> | |
constexpr std::size_t getSetIndexes(T* out) const { | |
std::size_t count = 0; | |
for (std::size_t block = 0; block < numBlocks; block++) { | |
uint64_t bits = blocks[block]; | |
while (bits) { | |
int idx = constexpr_ctzll(bits); | |
out[count++] = static_cast<T>(block * 64 + idx); | |
bits &= bits - 1; // clear lowest set bit | |
} | |
} | |
return count; | |
} | |
template <size_t... Is> | |
constexpr bool isAllUnsetImpl(std::index_sequence<Is...>) const noexcept { | |
return ((blocks[Is] == 0ull) && ...); // fold expression | |
} | |
}; | |
template<uint8_t V> | |
constexpr ConstexprBitset<256> make_single_bitset() { | |
ConstexprBitset<256> bs{}; | |
bs.set(V); | |
return bs; | |
} | |
template<std::size_t... Is> | |
constexpr auto make_identity_table(std::index_sequence<Is...>) { | |
return std::array<ConstexprBitset<256>, 256>{ | |
make_single_bitset<static_cast<uint8_t>(Is)>()... | |
}; | |
} | |
// Public inline constexpr lookup table | |
inline constexpr auto identity_bitset_table = | |
make_identity_table(std::make_index_sequence<256>{}); | |
template<uint8_t X> | |
constexpr const auto& get_identity_bitset_cmpl() { | |
return identity_bitset_table[X]; | |
} | |
constexpr const ConstexprBitset<256>& get_identity_bitset(uint8_t x) { | |
return identity_bitset_table[x]; | |
} | |
template<size_t NROWS> | |
constexpr void byte_row_to_bit_sets(const std::array<uint8_t, NROWS>& input, | |
std::array<ConstexprBitset<256>, NROWS>& out) | |
{ | |
for (size_t i = 0; i < NROWS; ++i) | |
{ | |
auto const& bs = get_identity_bitset(input[i]); | |
out[i] = bs; | |
} | |
} | |
template<size_t entropyDepth, size_t NROWS> | |
struct EntropySet { | |
using innerSetType = std::array<std::array<ConstexprBitset<256>, NROWS>, 256>; | |
static_assert(entropyDepth > 0 && entropyDepth < 257, "Entropy must be within byte range"); | |
std::array<innerSetType, entropyDepth> entropySlots; | |
constexpr void insert(const std::array<uint8_t, NROWS>& vec) { | |
} | |
}; | |
// ========= PermutationIterator (compile-time sized) ========= | |
template<std::size_t NROWS> | |
struct PermutationIterator { | |
static constexpr std::size_t MAX_CANDIDATES = 256; | |
// rows[i][hamming-distance] = candidate bitset | |
const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& rows; | |
// storage | |
std::array<std::array<uint8_t, MAX_CANDIDATES>, NROWS> candidates{}; | |
std::array<std::size_t, NROWS> candCounts{}; | |
std::array<std::size_t, NROWS> indices{}; | |
std::array<uint8_t, NROWS> current{}; | |
bool done = false; | |
constexpr PermutationIterator(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& r) | |
: rows(r) | |
{ | |
for (std::size_t i = 0; i < NROWS; i++) { | |
std::size_t total = 0; | |
for (auto& bs : rows[i]) { | |
total += bs.getSetIndexes(candidates[i].data() + total); | |
} | |
candCounts[i] = total; | |
if (total == 0) { done = true; return; } | |
current[i] = candidates[i][0]; | |
} | |
} | |
constexpr bool next() { | |
if (done) return false; | |
for (std::size_t pos = NROWS; pos-- > 0;) { | |
if (++indices[pos] < candCounts[pos]) { | |
current[pos] = candidates[pos][indices[pos]]; | |
for (std::size_t j = pos + 1; j < NROWS; j++) { | |
indices[j] = 0; | |
current[j] = candidates[j][0]; | |
} | |
return true; | |
} | |
} | |
done = true; | |
return false; | |
} | |
constexpr const std::array<uint8_t, NROWS>& value() const { return current; } | |
}; | |
// ---------- helpers (all constexpr) ---------- | |
constexpr std::size_t binomial(unsigned n, unsigned k) { | |
if (k > n) return 0; | |
std::size_t res = 1; | |
for (unsigned i = 1; i <= k; i++) { | |
res = res * (n - (k - i)) / i; | |
} | |
return res; | |
} | |
// quantizers | |
// Dequantize 32-bit unsigned int back to float in [0,1] | |
constexpr float dequantize_float_32(uint32_t q) { | |
constexpr float levels = 4294967295.0f; // 2^32 - 1 | |
// Use midpoint reconstruction | |
return (static_cast<float>(q) + 0.5f) / levels; | |
} | |
// Impl with loop unrolling | |
template <size_t N, size_t... Is> | |
constexpr auto dequantize_unrolled_impl_32(const std::array<uint32_t, N>& input, | |
std::index_sequence<Is...>) { | |
std::array<float, N> out{}; | |
((out[Is] = dequantize_float_32(input[Is])), ...); | |
return out; | |
} | |
// Public wrapper | |
template <size_t N> | |
constexpr auto dequantize_unrolled32(const std::array<uint32_t, N>& input) { | |
return dequantize_unrolled_impl_32<N>(input, std::make_index_sequence<N>{}); | |
} | |
// Quantize float to 32-bit unsigned integer | |
constexpr uint32_t quantize_float_32(float val) { | |
constexpr float levels = 4294967295.0f; // 2^32 - 1, but in float | |
uint64_t q = static_cast<uint64_t>(val * levels); | |
return static_cast<uint32_t>(q); | |
} | |
// Implementation using index_sequence for loop unrolling | |
template <size_t N, size_t... Is> | |
constexpr auto quantize_unrolled_impl_32(const std::array<float, N>& input, | |
std::index_sequence<Is...>) { | |
std::array<uint32_t, N> out{}; | |
((out[Is] = quantize_float_32(input[Is])), ...); | |
return out; | |
} | |
// Public wrapper | |
template <size_t N> | |
constexpr auto quantize_unrolled32(const std::array<float, N>& input) { | |
return quantize_unrolled_impl_32<N>(input, std::make_index_sequence<N>{}); | |
} | |
template<int B> | |
constexpr uint16_t quantize_float16(float val) { | |
constexpr int levels = 1 << B; | |
uint16_t q = static_cast<uint16_t>(val * levels); | |
if (q >= levels) q = levels - 1; | |
return q; | |
} | |
template <size_t N, int B, size_t... Is> | |
constexpr auto quantize_unrolled_impl16(const std::array<float, N>& input, | |
std::index_sequence<Is...>) | |
{ | |
std::array<uint16_t, N> out{}; | |
((out[Is] = quantize_float16<B>(input[Is])), ...); | |
return out; | |
} | |
template <size_t N, int B> | |
constexpr auto quantize_unrolled16(const std::array<float, N>& input) | |
{ | |
return quantize_unrolled_impl16<N, B>(input, std::make_index_sequence<N>{}); | |
} | |
template<int B> | |
constexpr float dequantize_float16(uint16_t q) { | |
constexpr int levels = 1 << B; | |
// Reconstruct float to midpoint of the quantization bin | |
return (static_cast<float>(q) + 0.5f) / static_cast<float>(levels); | |
} | |
template <size_t N, int B, size_t... Is> | |
constexpr auto dequantize_unrolled_impl16(const std::array<uint16_t, N>& input, | |
std::index_sequence<Is...>) | |
{ | |
std::array<float, N> out{}; | |
((out[Is] = dequantize_float16<B>(input[Is])), ...); | |
return out; | |
} | |
template <size_t N, int B> | |
constexpr auto dequantize_unrolled16(const std::array<uint16_t, N>& input) | |
{ | |
return dequantize_unrolled_impl16<N, B>(input, std::make_index_sequence<N>{}); | |
} | |
template<int B> | |
constexpr float dequantize_float(uint8_t q) { | |
constexpr int levels = 1 << B; | |
// Map quantized int back to float in [0,1) | |
return (static_cast<float>(q) + 0.5f) / static_cast<float>(levels); | |
} | |
template <size_t N, int B, size_t... Is> | |
constexpr auto dequantize_unrolled_impl(const std::array<uint8_t, (N * B + 7) / 8>& input, | |
std::index_sequence<Is...>) | |
{ | |
std::array<float, N> out{}; | |
// One expression per dimension | |
((out[Is] = dequantize_float<B>( | |
(input[(Is * B) >> 3] >> ((Is * B) & 7)) & ((1u << B) - 1) | |
)), ...); | |
return out; | |
} | |
template <size_t N, int B> | |
constexpr auto dequantize_unrolled(const std::array<uint8_t, (N * B + 7) / 8>& input) | |
{ | |
return dequantize_unrolled_impl<N, B>(input, std::make_index_sequence<N>{}); | |
} | |
template<int B> | |
constexpr uint8_t quantize_float(float val) { | |
constexpr int levels = 1 << B; | |
uint8_t q = static_cast<uint8_t>(val * levels); | |
if (q >= levels) q = levels - 1; | |
return q; | |
} | |
template <size_t N, int B, size_t... Is> | |
constexpr auto quantize_unrolled_impl(const std::array<float, N>& input, | |
std::index_sequence<Is...>) | |
{ | |
constexpr size_t bitsPerByte = 8; | |
constexpr size_t outSize = (N * B + 7) / bitsPerByte; | |
std::array<uint8_t, outSize> out{}; | |
// unrolled expression: one line per dimension | |
((out[(Is * B) >> 3] |= quantize_float<B>(input[Is]) << ((Is * B) & 7)), ...); | |
return out; | |
} | |
template <size_t N, int B> | |
constexpr auto quantize_unrolled(const std::array<float, N>& input) | |
{ | |
return quantize_unrolled_impl<N, B>(input, std::make_index_sequence<N>{}); | |
} | |
// simple constexpr popcount (no loop, uses builtin if available) | |
constexpr int popcount8(uint8_t x) { | |
#if defined(__clang__) || defined(__GNUC__) | |
return __builtin_popcount(x); | |
#else | |
int cnt = 0; | |
for (; x; x &= (x - 1)) ++cnt; | |
return cnt; | |
#endif | |
} | |
// Generate masks with popcount == N | |
template<int N> | |
constexpr auto generate_masks() { | |
constexpr std::size_t Count = binomial(8, N); | |
std::array<uint8_t, Count> masks{}; | |
std::size_t idx = 0; | |
for (int u = 0; u < 256; ++u) { | |
if (popcount8(static_cast<uint8_t>(u)) == N) { | |
masks[idx++] = static_cast<uint8_t>(u); | |
} | |
} | |
return masks; | |
} | |
// Compute neighbors for a fixed value | |
template<int N, uint8_t V> | |
constexpr auto hamming_neighbors() { | |
constexpr auto masks = generate_masks<N>(); | |
constexpr std::size_t Count = binomial(8, N); | |
std::array<uint8_t, Count> res{}; | |
for (std::size_t i = 0; i < Count; i++) { | |
res[i] = static_cast<uint8_t>(V ^ masks[i]); | |
} | |
return res; // returns exactly Count neighbors | |
} | |
template<int N, uint8_t V> | |
constexpr auto hamming_neighbor_set() { | |
constexpr auto masks = generate_masks<N>(); | |
constexpr std::size_t Count = binomial(8, N); | |
ConstexprBitset<256> res{}; | |
for (std::size_t i = 0; i < Count; i++) { | |
res.set(static_cast<uint8_t>(V ^ masks[i])); | |
} | |
return res; | |
} | |
template<int N, std::size_t... Is> | |
constexpr auto make_table_of_sets(std::index_sequence<Is...>) { | |
return std::array{ hamming_neighbor_set<N, static_cast<uint8_t>(Is)>()... }; | |
} | |
template<int N> | |
constexpr auto precomputed_neighbor_sets() { | |
return make_table_of_sets<N>(std::make_index_sequence<256>{}); | |
} | |
template<int N> | |
inline constexpr auto hamming_set = precomputed_neighbor_sets<N>(); | |
// Full table: 256 entries of arrays of size C(8,N) | |
template<int N, std::size_t... Is> | |
constexpr auto make_table(std::index_sequence<Is...>) { | |
return std::array{ hamming_neighbors<N, static_cast<uint8_t>(Is)>()... }; | |
} | |
template<int N> | |
constexpr auto precomputed_neighbors_table() { | |
return make_table<N>(std::make_index_sequence<256>{}); | |
} | |
template<int N> | |
inline constexpr auto hamming_table = precomputed_neighbors_table<N>(); | |
/** | |
* A lazy retrieval where only the bit sets are got, but the bit placements are not | |
* extracted yet. | |
* */ | |
constexpr void get_all_neighbor_sets(uint8_t byte, const ConstexprBitset<256>& set, | |
std::array<ConstexprBitset<256>, 8>& out) { | |
constexpr auto& table1 = hamming_set<1>; | |
constexpr auto& table2 = hamming_set<2>; | |
constexpr auto& table3 = hamming_set<3>; | |
constexpr auto& table4 = hamming_set<4>; | |
constexpr auto& table5 = hamming_set<5>; | |
constexpr auto& table6 = hamming_set<6>; | |
constexpr auto& table7 = hamming_set<7>; | |
out[0] = set & get_identity_bitset(byte); | |
out[1] = set & table1[byte]; | |
out[2] = set & table2[byte]; | |
out[3] = set & table3[byte]; | |
out[4] = set & table4[byte]; | |
out[5] = set & table5[byte]; | |
out[6] = set & table6[byte]; | |
out[7] = set & table7[byte]; | |
} | |
constexpr void get_neighbor_set_1(uint8_t byte, const ConstexprBitset<256>& set, | |
std::array<ConstexprBitset<256>, 8>& out) { | |
constexpr auto& table1 = hamming_set<1>; | |
out[0] = set & get_identity_bitset(byte); | |
out[1] = set & table1[byte]; | |
} | |
/** | |
* Index sequencer to unroll the loop | |
* */ | |
template<size_t NROWS, size_t... Is> | |
constexpr void get_all_neighbor_sets_arr_impl( | |
const std::array<uint8_t, NROWS>& inputVector, | |
const std::array<ConstexprBitset<256>, NROWS>& inputSets, | |
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out, | |
std::index_sequence<Is...>) | |
{ | |
// Fold expression over comma operator — each index expands into its call | |
(get_all_neighbor_sets(inputVector[Is], inputSets[Is], out[Is]), ...); | |
} | |
template<size_t NROWS, size_t... Is> | |
constexpr void get_neighbor_sets_1_arr_impl( | |
const std::array<uint8_t, NROWS>& inputVector, | |
const std::array<ConstexprBitset<256>, NROWS>& inputSets, | |
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out, | |
std::index_sequence<Is...>) | |
{ | |
// Fold expression over comma operator — each index expands into its call | |
(get_neighbor_set_1(inputVector[Is], inputSets[Is], out[Is]), ...); | |
} | |
/** | |
* Takes an input vector, as the query, and generates an array of array of bitsets, | |
* the returned value has indices which represent the hamming distance and sets of bytes as vectors | |
* from the query vector. | |
* */ | |
template<size_t NROWS> | |
constexpr void get_all_neighbor_sets_arr( | |
const std::array<uint8_t, NROWS>& inputVector, | |
const std::array<ConstexprBitset<256>, NROWS>& inputSets, | |
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out) | |
{ | |
get_all_neighbor_sets_arr_impl(inputVector, inputSets, out, | |
std::make_index_sequence<NROWS>{}); | |
} | |
template<size_t NROWS> | |
constexpr void get_neighbor_sets_1_arr( | |
const std::array<uint8_t, NROWS>& inputVector, | |
const std::array<ConstexprBitset<256>, NROWS>& inputSets, | |
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out) | |
{ | |
get_neighbor_sets_1_arr_impl(inputVector, inputSets, out, | |
std::make_index_sequence<NROWS>{}); | |
} | |
template<size_t NROWS, size_t... Is> | |
constexpr bool is_fake_vector_impl(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& inputSet, | |
std::index_sequence<Is...>) | |
{ | |
return ((inputSet[Is][0].isAllUnset()) || ...); | |
} | |
template<size_t NROWS> | |
constexpr bool is_fake_vector(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& inputSet) { | |
return is_fake_vector_impl(inputSet, std::make_index_sequence<NROWS>{}); | |
} | |
/** | |
* This expands neighbors from the bitsets to flat array, | |
* slower than neighbor sets method | |
* */ | |
constexpr size_t get_all_neighbors(uint8_t byte, const ConstexprBitset<256>& set, | |
std::array<uint8_t, 256>& out) { | |
constexpr auto& table1 = hamming_set<1>; | |
constexpr auto& table2 = hamming_set<2>; | |
constexpr auto& table3 = hamming_set<3>; | |
constexpr auto& table4 = hamming_set<4>; | |
constexpr auto& table5 = hamming_set<5>; | |
constexpr auto& table6 = hamming_set<6>; | |
constexpr auto& table7 = hamming_set<7>; | |
size_t total = 0; | |
const ConstexprBitset myNeighbors1 = set & table1[byte]; | |
const ConstexprBitset myNeighbors2 = set & table2[byte]; | |
const ConstexprBitset myNeighbors3 = set & table3[byte]; | |
const ConstexprBitset myNeighbors4 = set & table4[byte]; | |
const ConstexprBitset myNeighbors5 = set & table5[byte]; | |
const ConstexprBitset myNeighbors6 = set & table6[byte]; | |
const ConstexprBitset myNeighbors7 = set & table7[byte]; | |
total += myNeighbors1.getSetIndexes(out.data() + total); | |
total += myNeighbors2.getSetIndexes(out.data() + total); | |
total += myNeighbors3.getSetIndexes(out.data() + total); | |
total += myNeighbors4.getSetIndexes(out.data() + total); | |
total += myNeighbors5.getSetIndexes(out.data() + total); | |
total += myNeighbors6.getSetIndexes(out.data() + total); | |
total += myNeighbors7.getSetIndexes(out.data() + total); | |
return total; | |
} | |
static void quantizePerfTesting() { | |
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)}; | |
size_t total = 0; | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < 10000000; ++i){ | |
const auto q1 = quantize_unrolled<100, 8>(numbers); | |
total += q1[0]; | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Function 8bit Quantize execution time: " << duration.count() << " microseconds" << std::endl; | |
printf("total is %zu\n", total); | |
} | |
static void quantizePerfTesting16() { | |
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)}; | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < 10000000; ++i){ | |
volatile const auto q1 = quantize_unrolled16<100, 16>(numbers); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Function 16bit Quantize execution time: " << duration.count() << " microseconds" << std::endl; | |
} | |
static void quantizePerfTesting32() { | |
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)}; | |
size_t total = 0; | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < 10000000; ++i){ | |
const auto q1 = quantize_unrolled32<100>(numbers); | |
total += q1[0]; | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Function 32bit Quantize execution time: " << duration.count() << " microseconds" << std::endl; | |
printf("total is %zu\n", total); | |
} | |
static void endToEndPerfTest() { | |
constexpr size_t testRowSize = 96; | |
static constexpr size_t collectionSize = 100000000; | |
static constexpr size_t queryTestCount = 1000000; | |
std::array<ConstexprBitset<256>, testRowSize> myCollection; | |
// fill collection | |
for (size_t i = 0; i < collectionSize; ++i) | |
{ | |
const std::array<float, testRowSize> generatedRow = make_random_array<testRowSize>(); | |
const std::array<uint8_t, testRowSize> myByteRow = quantize_unrolled<testRowSize, 8>(generatedRow); | |
for (size_t j = 0; j < myByteRow.size(); ++j) | |
{ | |
myCollection[j] &= get_identity_bitset(myByteRow[j]); | |
} | |
} | |
const auto queryRow = make_random_array<testRowSize>(); | |
std::array<std::array<ConstexprBitset<256>, 8>, testRowSize> queryResult; | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < queryTestCount; ++i) | |
{ | |
const auto myByteQuery = quantize_unrolled<testRowSize, 8>(queryRow); | |
get_neighbor_sets_1_arr<testRowSize>( myByteQuery, myCollection, queryResult); | |
//PermutationIterator<testRowSize> iter(queryResult); | |
//iter.next(); | |
//Function endToEndPerfTest execution time: 144397 microseconds | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
std::cout << "Function " << __FUNCTION__ << " execution time: " << duration.count() << " microseconds" << std::endl; | |
} | |
static void perfTesting(char* argv[]) { | |
{ | |
int int_value = std::stoi(argv[1]); | |
size_t total = 0; | |
auto start = std::chrono::high_resolution_clock::now(); | |
std::array<uint8_t, 256> myArray; | |
ConstexprBitset<256> mySet; | |
mySet.set(39); | |
mySet.set(143); | |
for (int i = 0; i < 10000000; ++i) | |
{ | |
total += get_all_neighbors((uint8_t)int_value, mySet, | |
myArray); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
// Calculate the duration | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
printf("total was %zu\n", total); | |
// Print the elapsed time | |
std::cout << "Function execution time: " << duration.count() << " microseconds" << std::endl; | |
} | |
{ | |
int int_value = std::stoi(argv[1]); | |
auto start = std::chrono::high_resolution_clock::now(); | |
volatile size_t number = 0xff; | |
std::array<ConstexprBitset<256>, 8> myArray; | |
ConstexprBitset<256> mySet; | |
mySet.set(59); | |
mySet.set(113); | |
mySet.set(8); | |
mySet.set(91); | |
int i = 10000000; | |
while(--i) | |
{ | |
get_all_neighbor_sets(i & number, mySet, | |
myArray); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
// Calculate the duration | |
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start); | |
// Print the elapsed time | |
std::cout << "Function2 execution time: " << duration.count() << " nanoseconds" << std::endl; | |
} | |
/** | |
------------- | |
total was 20000000 | |
Function execution time: 114794 microseconds | |
total was 6988044573398270032, set is 134217728 | |
Function2 execution time: 6623583 nanoseconds | |
* */ | |
} | |
static void perfTesting2(char* argv[]) { | |
{ | |
int int_value = std::stoi(argv[1]); | |
size_t total = 0; | |
auto start = std::chrono::high_resolution_clock::now(); | |
std::array<uint8_t, 256> myArray; | |
ConstexprBitset<256> mySet; | |
mySet.set(39); | |
mySet.set(143); | |
for (int i = 0; i < 10000000; ++i) | |
{ | |
total += get_all_neighbors((uint8_t)int_value, mySet, | |
myArray); | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
// Calculate the duration | |
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start); | |
printf("total was %zu\n", total); | |
// Print the elapsed time | |
std::cout << "Function execution time: " << duration.count() << " microseconds" << std::endl; | |
} | |
{ | |
int int_value = std::stoi(argv[1]); | |
volatile size_t number = 0xff; | |
auto start = std::chrono::high_resolution_clock::now(); | |
std::array<ConstexprBitset<256>, 8> myArray; | |
ConstexprBitset<256> mySet; | |
mySet.set(56); | |
mySet.set(113); | |
mySet.set(8); | |
mySet.set(91); | |
for (int i = 0; i < 10000000; ++i) | |
{ | |
get_neighbor_set_1(i & number, mySet, | |
myArray); | |
//total += myArray[1].blocks[1]; | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
// Calculate the duration | |
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start); | |
// Print the elapsed time | |
std::cout << "Function3 test 2 execution time: " << duration.count() << " nanoseconds" << std::endl; | |
} | |
} | |
int main(int argc, char* argv[]) { | |
constexpr uint8_t v = 0b00001111; | |
constexpr auto neighbors = hamming_neighbors<2, v>(); | |
constexpr auto neighbors2 = hamming_neighbors<1, v>(); | |
for (auto n : neighbors) { | |
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n"; | |
} | |
puts("------------"); | |
for (auto n : neighbors2) { | |
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n"; | |
} | |
constexpr auto& table1 = hamming_table<1>; | |
constexpr auto& table2 = hamming_table<1>; | |
puts("------------"); | |
for (auto n : table1[0b1101]) { | |
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n"; | |
} | |
puts("-------------"); | |
//perfTesting(argv); | |
//perfTesting2(argv); | |
//quantizePerfTesting(); | |
//quantizePerfTesting16(); | |
//quantizePerfTesting32(); | |
endToEndPerfTest(); | |
/**total was 20000000 | |
Function execution time: 120658 microseconds | |
i is now 80 | |
Function2 execution time: 3108209 nanoseconds | |
total was 20000000 | |
Function execution time: 109372 microseconds | |
Function3 test 2 execution time: 3118209 nanoseconds | |
Function 8bit Quantize execution time: 35470 microseconds | |
total is 0 | |
Function 16bit Quantize execution time: 39884 microseconds | |
total is 224130000000 | |
Function 32bit Quantize execution time: 119789 microseconds | |
total is 41428131840000000*/ | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment