Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active August 31, 2025 23:26
Show Gist options
  • Save jweinst1/e64646920c463d6284c4fdd824aac923 to your computer and use it in GitHub Desktop.
Save jweinst1/e64646920c463d6284c4fdd824aac923 to your computer and use it in GitHub Desktop.
Uses templates and constexpr for compile time hamming lookup
#include <array>
#include <cstdint>
#include <cstddef>
// ---------- ConstexprBitset ----------
template<std::size_t N>
struct ConstexprBitset {
static constexpr std::size_t numBlocks = (N + 63) / 64;
std::array<uint64_t, numBlocks> blocks{};
constexpr void set(std::size_t idx) {
blocks[idx / 64] |= (1ULL << (idx & 63));
}
template<typename T>
constexpr std::size_t getSetIndexes(T* out) const {
std::size_t count = 0;
for (std::size_t block = 0; block < numBlocks; block++) {
uint64_t bits = blocks[block];
while (bits) {
int idx = __builtin_ctzll(bits);
out[count++] = static_cast<T>(block * 64 + idx);
bits &= bits - 1;
}
}
return count;
}
};
// ---------- Product Metafunction ----------
template<std::size_t... Ns>
struct product;
template<>
struct product<> {
static constexpr std::size_t value = 1;
};
template<std::size_t N, std::size_t... Ns>
struct product<N, Ns...> {
static constexpr std::size_t value = N * product<Ns...>::value;
};
// ---------- Generate Neighbors ----------
template<std::size_t NROWS, std::size_t... COUNTS>
constexpr auto generate_neighbors(
const std::array<std::array<uint8_t, 256>, NROWS>& candidates,
const std::array<std::size_t, NROWS>& counts)
{
constexpr std::size_t total_size = product<COUNTS...>::value;
std::array<std::array<uint8_t, NROWS>, total_size> result{};
std::array<std::size_t, NROWS> idx{};
std::size_t outIdx = 0;
bool done = false;
while (!done) {
// capture permutation
for (std::size_t j = 0; j < NROWS; j++) {
result[outIdx][j] = candidates[j][idx[j]];
}
++outIdx;
// advance counters
for (std::size_t pos = NROWS; pos-- > 0;) {
if (++idx[pos] < counts[pos]) {
for (std::size_t j = pos + 1; j < NROWS; j++) idx[j] = 0;
goto continue_outer;
}
}
done = true;
continue_outer:;
}
return result;
}
#include <array>
#include <cstdint>
#include <cstddef>
#include <iostream>
// ========= ConstexprBitset =========
template<std::size_t N>
struct ConstexprBitset {
static constexpr std::size_t numBlocks = (N + 63) / 64;
std::array<uint64_t, numBlocks> blocks{};
constexpr void set(std::size_t idx) {
blocks[idx / 64] |= (1ULL << (idx & 63));
}
constexpr bool empty() const {
for (auto b : blocks) if (b) return false;
return true;
}
template<typename T>
constexpr std::size_t getSetIndexes(T* out) const {
std::size_t count = 0;
for (std::size_t block = 0; block < numBlocks; block++) {
uint64_t bits = blocks[block];
while (bits) {
int idx = __builtin_ctzll(bits); // constexpr_ctzll fallback if needed
out[count++] = static_cast<T>(block * 64 + idx);
bits &= bits - 1;
}
}
return count;
}
};
// ========= PermutationIterator (compile-time sized) =========
template<std::size_t NROWS>
struct PermutationIterator {
static constexpr std::size_t MAX_CANDIDATES = 256;
// rows[i][hamming-distance] = candidate bitset
const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& rows;
// storage
std::array<std::array<uint8_t, MAX_CANDIDATES>, NROWS> candidates{};
std::array<std::size_t, NROWS> candCounts{};
std::array<std::size_t, NROWS> indices{};
std::array<uint8_t, NROWS> current{};
bool done = false;
constexpr PermutationIterator(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& r)
: rows(r)
{
for (std::size_t i = 0; i < NROWS; i++) {
std::size_t total = 0;
for (auto& bs : rows[i]) {
total += bs.getSetIndexes(candidates[i].data() + total);
}
candCounts[i] = total;
if (total == 0) { done = true; return; }
current[i] = candidates[i][0];
}
}
constexpr bool next() {
if (done) return false;
for (std::size_t pos = NROWS; pos-- > 0;) {
if (++indices[pos] < candCounts[pos]) {
current[pos] = candidates[pos][indices[pos]];
for (std::size_t j = pos + 1; j < NROWS; j++) {
indices[j] = 0;
current[j] = candidates[j][0];
}
return true;
}
}
done = true;
return false;
}
constexpr const std::array<uint8_t, NROWS>& value() const { return current; }
};
#include <array>
#include <cstdint>
#include <cstddef>
#include <iostream>
#include <vector>
// ========= ConstexprBitset with array-style getSetIndexes =========
template<std::size_t N>
struct ConstexprBitset {
static constexpr std::size_t numBlocks = (N + 63) / 64;
std::array<uint64_t, numBlocks> blocks{};
constexpr void set(std::size_t idx) {
blocks[idx / 64] |= (1ULL << (idx & 63));
}
constexpr bool empty() const {
for (auto b : blocks) if (b) return false;
return true;
}
template<typename T>
constexpr std::size_t getSetIndexes(T* out) const {
std::size_t count = 0;
for (std::size_t block = 0; block < numBlocks; block++) {
uint64_t bits = blocks[block];
while (bits) {
int idx = __builtin_ctzll(bits); // portable constexpr_ctzll if needed
out[count++] = static_cast<T>(block * 64 + idx);
bits &= bits - 1;
}
}
return count;
}
};
// ========= PermutationIterator with flat arrays =========
struct PermutationIterator {
static constexpr std::size_t MAX_CANDIDATES = 256;
const std::vector<std::array<ConstexprBitset<256>, 8>>& rows;
std::vector<std::array<uint8_t, MAX_CANDIDATES>> candidates; // row -> array of candidates
std::vector<std::size_t> candCounts; // row -> how many valid
std::vector<std::size_t> indices; // row -> current position
std::vector<uint8_t> current; // assembled vector
bool done = false;
PermutationIterator(const std::vector<std::array<ConstexprBitset<256>, 8>>& r)
: rows(r), candidates(r.size()), candCounts(r.size()), indices(r.size(), 0), current(r.size())
{
// Fill flat arrays
for (std::size_t i = 0; i < rows.size(); i++) {
auto& row = rows[i];
std::size_t total = 0;
for (auto& bs : row) {
total += bs.getSetIndexes(candidates[i].data() + total);
}
candCounts[i] = total;
if (total == 0) { done = true; return; }
current[i] = candidates[i][0];
}
}
bool next() {
if (done) return false;
// Odometer increment
for (std::size_t pos = rows.size(); pos-- > 0;) {
if (++indices[pos] < candCounts[pos]) {
current[pos] = candidates[pos][indices[pos]];
// reset lower positions
for (std::size_t j = pos + 1; j < rows.size(); j++) {
indices[j] = 0;
current[j] = candidates[j][0];
}
return true;
}
}
done = true;
return false;
}
const std::vector<uint8_t>& value() const { return current; }
};
#include <array>
#include <cstdint>
#include <cstddef>
#include <iostream>
#include <bitset>
#include <utility>
#include <chrono>
#include <cstdlib>
#include <random>
#include <limits>
static float randomFloatInRange(float min, float max) {
return min + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (max - min)));
}
// Generate a random std::array<float, N> within [minVal, maxVal]
template <std::size_t N>
std::array<float, N> make_random_array(float minVal = 0.0f, float maxVal = 1.0f) {
static thread_local std::mt19937 rng(std::random_device{}());
std::uniform_real_distribution<float> dist(minVal, maxVal);
std::array<float, N> arr{};
for (auto &val : arr) {
val = dist(rng);
}
return arr;
}
constexpr int constexpr_ctzll(unsigned long long x) {
#if defined(__GNUC__) || defined(__clang__)
if (x == 0) return 64; // define ctz(0) = 64 (all bits zero)
return __builtin_ctzll(x);
#elif defined(_MSC_VER)
unsigned long idx = 0;
if (_BitScanForward64(&idx, x))
return static_cast<int>(idx);
return 64; // if x == 0
#else
// Portable fallback
if (x == 0) return 64;
int c = 0;
while ((x & 1ULL) == 0) {
x >>= 1;
++c;
}
return c;
#endif
}
template<std::size_t N>
struct ConstexprBitset {
static constexpr std::size_t numBlocks = (N + 63) / 64;
std::array<uint64_t, numBlocks> blocks{};
// -------------------
// Set & Test
// -------------------
constexpr void set(std::size_t idx) {
blocks[idx / 64] |= (1ULL << (idx & 63));
}
constexpr bool test(std::size_t idx) const {
return (blocks[idx / 64] >> (idx & 63)) & 1ULL;
}
// -------------------
// Bitwise operators
// -------------------
constexpr auto operator|(ConstexprBitset const& other) const {
ConstexprBitset res;
for (std::size_t i = 0; i < numBlocks; i++)
res.blocks[i] = blocks[i] | other.blocks[i];
return res;
}
constexpr auto operator&(ConstexprBitset const& other) const {
ConstexprBitset res;
for (std::size_t i = 0; i < numBlocks; i++)
res.blocks[i] = blocks[i] & other.blocks[i];
return res;
}
constexpr auto operator&=(ConstexprBitset const& other) {
for (std::size_t i = 0; i < numBlocks; i++)
blocks[i] &= other.blocks[i];
}
constexpr auto operator^(ConstexprBitset const& other) const {
ConstexprBitset res;
for (std::size_t i = 0; i < numBlocks; i++)
res.blocks[i] = blocks[i] ^ other.blocks[i];
return res;
}
constexpr explicit operator bool() const noexcept {
for (std::size_t i = 0; i < numBlocks; i++) {
if (blocks[i] != 0)
return true;
}
return false;
}
static constexpr ConstexprBitset<256> make_from_byte(uint8_t b) {
ConstexprBitset<256> bs;
bs.set(b); // set exactly that index
return bs;
}
constexpr bool isAllUnset() const noexcept {
return isAllUnsetImpl(std::make_index_sequence<numBlocks>{});
}
template<typename T>
constexpr std::size_t getSetIndexes(T* out) const {
std::size_t count = 0;
for (std::size_t block = 0; block < numBlocks; block++) {
uint64_t bits = blocks[block];
while (bits) {
int idx = constexpr_ctzll(bits);
out[count++] = static_cast<T>(block * 64 + idx);
bits &= bits - 1; // clear lowest set bit
}
}
return count;
}
template <size_t... Is>
constexpr bool isAllUnsetImpl(std::index_sequence<Is...>) const noexcept {
return ((blocks[Is] == 0ull) && ...); // fold expression
}
};
template<uint8_t V>
constexpr ConstexprBitset<256> make_single_bitset() {
ConstexprBitset<256> bs{};
bs.set(V);
return bs;
}
template<std::size_t... Is>
constexpr auto make_identity_table(std::index_sequence<Is...>) {
return std::array<ConstexprBitset<256>, 256>{
make_single_bitset<static_cast<uint8_t>(Is)>()...
};
}
// Public inline constexpr lookup table
inline constexpr auto identity_bitset_table =
make_identity_table(std::make_index_sequence<256>{});
template<uint8_t X>
constexpr const auto& get_identity_bitset_cmpl() {
return identity_bitset_table[X];
}
constexpr const ConstexprBitset<256>& get_identity_bitset(uint8_t x) {
return identity_bitset_table[x];
}
template<size_t NROWS>
constexpr void byte_row_to_bit_sets(const std::array<uint8_t, NROWS>& input,
std::array<ConstexprBitset<256>, NROWS>& out)
{
for (size_t i = 0; i < NROWS; ++i)
{
auto const& bs = get_identity_bitset(input[i]);
out[i] = bs;
}
}
template<size_t entropyDepth, size_t NROWS>
struct EntropySet {
using innerSetType = std::array<std::array<ConstexprBitset<256>, NROWS>, 256>;
static_assert(entropyDepth > 0 && entropyDepth < 257, "Entropy must be within byte range");
std::array<innerSetType, entropyDepth> entropySlots;
constexpr void insert(const std::array<uint8_t, NROWS>& vec) {
}
};
// ========= PermutationIterator (compile-time sized) =========
template<std::size_t NROWS>
struct PermutationIterator {
static constexpr std::size_t MAX_CANDIDATES = 256;
// rows[i][hamming-distance] = candidate bitset
const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& rows;
// storage
std::array<std::array<uint8_t, MAX_CANDIDATES>, NROWS> candidates{};
std::array<std::size_t, NROWS> candCounts{};
std::array<std::size_t, NROWS> indices{};
std::array<uint8_t, NROWS> current{};
bool done = false;
constexpr PermutationIterator(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& r)
: rows(r)
{
for (std::size_t i = 0; i < NROWS; i++) {
std::size_t total = 0;
for (auto& bs : rows[i]) {
total += bs.getSetIndexes(candidates[i].data() + total);
}
candCounts[i] = total;
if (total == 0) { done = true; return; }
current[i] = candidates[i][0];
}
}
constexpr bool next() {
if (done) return false;
for (std::size_t pos = NROWS; pos-- > 0;) {
if (++indices[pos] < candCounts[pos]) {
current[pos] = candidates[pos][indices[pos]];
for (std::size_t j = pos + 1; j < NROWS; j++) {
indices[j] = 0;
current[j] = candidates[j][0];
}
return true;
}
}
done = true;
return false;
}
constexpr const std::array<uint8_t, NROWS>& value() const { return current; }
};
// ---------- helpers (all constexpr) ----------
constexpr std::size_t binomial(unsigned n, unsigned k) {
if (k > n) return 0;
std::size_t res = 1;
for (unsigned i = 1; i <= k; i++) {
res = res * (n - (k - i)) / i;
}
return res;
}
// quantizers
// Dequantize 32-bit unsigned int back to float in [0,1]
constexpr float dequantize_float_32(uint32_t q) {
constexpr float levels = 4294967295.0f; // 2^32 - 1
// Use midpoint reconstruction
return (static_cast<float>(q) + 0.5f) / levels;
}
// Impl with loop unrolling
template <size_t N, size_t... Is>
constexpr auto dequantize_unrolled_impl_32(const std::array<uint32_t, N>& input,
std::index_sequence<Is...>) {
std::array<float, N> out{};
((out[Is] = dequantize_float_32(input[Is])), ...);
return out;
}
// Public wrapper
template <size_t N>
constexpr auto dequantize_unrolled32(const std::array<uint32_t, N>& input) {
return dequantize_unrolled_impl_32<N>(input, std::make_index_sequence<N>{});
}
// Quantize float to 32-bit unsigned integer
constexpr uint32_t quantize_float_32(float val) {
constexpr float levels = 4294967295.0f; // 2^32 - 1, but in float
uint64_t q = static_cast<uint64_t>(val * levels);
return static_cast<uint32_t>(q);
}
// Implementation using index_sequence for loop unrolling
template <size_t N, size_t... Is>
constexpr auto quantize_unrolled_impl_32(const std::array<float, N>& input,
std::index_sequence<Is...>) {
std::array<uint32_t, N> out{};
((out[Is] = quantize_float_32(input[Is])), ...);
return out;
}
// Public wrapper
template <size_t N>
constexpr auto quantize_unrolled32(const std::array<float, N>& input) {
return quantize_unrolled_impl_32<N>(input, std::make_index_sequence<N>{});
}
template<int B>
constexpr uint16_t quantize_float16(float val) {
constexpr int levels = 1 << B;
uint16_t q = static_cast<uint16_t>(val * levels);
if (q >= levels) q = levels - 1;
return q;
}
template <size_t N, int B, size_t... Is>
constexpr auto quantize_unrolled_impl16(const std::array<float, N>& input,
std::index_sequence<Is...>)
{
std::array<uint16_t, N> out{};
((out[Is] = quantize_float16<B>(input[Is])), ...);
return out;
}
template <size_t N, int B>
constexpr auto quantize_unrolled16(const std::array<float, N>& input)
{
return quantize_unrolled_impl16<N, B>(input, std::make_index_sequence<N>{});
}
template<int B>
constexpr float dequantize_float16(uint16_t q) {
constexpr int levels = 1 << B;
// Reconstruct float to midpoint of the quantization bin
return (static_cast<float>(q) + 0.5f) / static_cast<float>(levels);
}
template <size_t N, int B, size_t... Is>
constexpr auto dequantize_unrolled_impl16(const std::array<uint16_t, N>& input,
std::index_sequence<Is...>)
{
std::array<float, N> out{};
((out[Is] = dequantize_float16<B>(input[Is])), ...);
return out;
}
template <size_t N, int B>
constexpr auto dequantize_unrolled16(const std::array<uint16_t, N>& input)
{
return dequantize_unrolled_impl16<N, B>(input, std::make_index_sequence<N>{});
}
template<int B>
constexpr float dequantize_float(uint8_t q) {
constexpr int levels = 1 << B;
// Map quantized int back to float in [0,1)
return (static_cast<float>(q) + 0.5f) / static_cast<float>(levels);
}
template <size_t N, int B, size_t... Is>
constexpr auto dequantize_unrolled_impl(const std::array<uint8_t, (N * B + 7) / 8>& input,
std::index_sequence<Is...>)
{
std::array<float, N> out{};
// One expression per dimension
((out[Is] = dequantize_float<B>(
(input[(Is * B) >> 3] >> ((Is * B) & 7)) & ((1u << B) - 1)
)), ...);
return out;
}
template <size_t N, int B>
constexpr auto dequantize_unrolled(const std::array<uint8_t, (N * B + 7) / 8>& input)
{
return dequantize_unrolled_impl<N, B>(input, std::make_index_sequence<N>{});
}
template<int B>
constexpr uint8_t quantize_float(float val) {
constexpr int levels = 1 << B;
uint8_t q = static_cast<uint8_t>(val * levels);
if (q >= levels) q = levels - 1;
return q;
}
template <size_t N, int B, size_t... Is>
constexpr auto quantize_unrolled_impl(const std::array<float, N>& input,
std::index_sequence<Is...>)
{
constexpr size_t bitsPerByte = 8;
constexpr size_t outSize = (N * B + 7) / bitsPerByte;
std::array<uint8_t, outSize> out{};
// unrolled expression: one line per dimension
((out[(Is * B) >> 3] |= quantize_float<B>(input[Is]) << ((Is * B) & 7)), ...);
return out;
}
template <size_t N, int B>
constexpr auto quantize_unrolled(const std::array<float, N>& input)
{
return quantize_unrolled_impl<N, B>(input, std::make_index_sequence<N>{});
}
// simple constexpr popcount (no loop, uses builtin if available)
constexpr int popcount8(uint8_t x) {
#if defined(__clang__) || defined(__GNUC__)
return __builtin_popcount(x);
#else
int cnt = 0;
for (; x; x &= (x - 1)) ++cnt;
return cnt;
#endif
}
// Generate masks with popcount == N
template<int N>
constexpr auto generate_masks() {
constexpr std::size_t Count = binomial(8, N);
std::array<uint8_t, Count> masks{};
std::size_t idx = 0;
for (int u = 0; u < 256; ++u) {
if (popcount8(static_cast<uint8_t>(u)) == N) {
masks[idx++] = static_cast<uint8_t>(u);
}
}
return masks;
}
// Compute neighbors for a fixed value
template<int N, uint8_t V>
constexpr auto hamming_neighbors() {
constexpr auto masks = generate_masks<N>();
constexpr std::size_t Count = binomial(8, N);
std::array<uint8_t, Count> res{};
for (std::size_t i = 0; i < Count; i++) {
res[i] = static_cast<uint8_t>(V ^ masks[i]);
}
return res; // returns exactly Count neighbors
}
template<int N, uint8_t V>
constexpr auto hamming_neighbor_set() {
constexpr auto masks = generate_masks<N>();
constexpr std::size_t Count = binomial(8, N);
ConstexprBitset<256> res{};
for (std::size_t i = 0; i < Count; i++) {
res.set(static_cast<uint8_t>(V ^ masks[i]));
}
return res;
}
template<int N, std::size_t... Is>
constexpr auto make_table_of_sets(std::index_sequence<Is...>) {
return std::array{ hamming_neighbor_set<N, static_cast<uint8_t>(Is)>()... };
}
template<int N>
constexpr auto precomputed_neighbor_sets() {
return make_table_of_sets<N>(std::make_index_sequence<256>{});
}
template<int N>
inline constexpr auto hamming_set = precomputed_neighbor_sets<N>();
// Full table: 256 entries of arrays of size C(8,N)
template<int N, std::size_t... Is>
constexpr auto make_table(std::index_sequence<Is...>) {
return std::array{ hamming_neighbors<N, static_cast<uint8_t>(Is)>()... };
}
template<int N>
constexpr auto precomputed_neighbors_table() {
return make_table<N>(std::make_index_sequence<256>{});
}
template<int N>
inline constexpr auto hamming_table = precomputed_neighbors_table<N>();
/**
* A lazy retrieval where only the bit sets are got, but the bit placements are not
* extracted yet.
* */
constexpr void get_all_neighbor_sets(uint8_t byte, const ConstexprBitset<256>& set,
std::array<ConstexprBitset<256>, 8>& out) {
constexpr auto& table1 = hamming_set<1>;
constexpr auto& table2 = hamming_set<2>;
constexpr auto& table3 = hamming_set<3>;
constexpr auto& table4 = hamming_set<4>;
constexpr auto& table5 = hamming_set<5>;
constexpr auto& table6 = hamming_set<6>;
constexpr auto& table7 = hamming_set<7>;
out[0] = set & get_identity_bitset(byte);
out[1] = set & table1[byte];
out[2] = set & table2[byte];
out[3] = set & table3[byte];
out[4] = set & table4[byte];
out[5] = set & table5[byte];
out[6] = set & table6[byte];
out[7] = set & table7[byte];
}
constexpr void get_neighbor_set_1(uint8_t byte, const ConstexprBitset<256>& set,
std::array<ConstexprBitset<256>, 8>& out) {
constexpr auto& table1 = hamming_set<1>;
out[0] = set & get_identity_bitset(byte);
out[1] = set & table1[byte];
}
/**
* Index sequencer to unroll the loop
* */
template<size_t NROWS, size_t... Is>
constexpr void get_all_neighbor_sets_arr_impl(
const std::array<uint8_t, NROWS>& inputVector,
const std::array<ConstexprBitset<256>, NROWS>& inputSets,
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out,
std::index_sequence<Is...>)
{
// Fold expression over comma operator — each index expands into its call
(get_all_neighbor_sets(inputVector[Is], inputSets[Is], out[Is]), ...);
}
template<size_t NROWS, size_t... Is>
constexpr void get_neighbor_sets_1_arr_impl(
const std::array<uint8_t, NROWS>& inputVector,
const std::array<ConstexprBitset<256>, NROWS>& inputSets,
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out,
std::index_sequence<Is...>)
{
// Fold expression over comma operator — each index expands into its call
(get_neighbor_set_1(inputVector[Is], inputSets[Is], out[Is]), ...);
}
/**
* Takes an input vector, as the query, and generates an array of array of bitsets,
* the returned value has indices which represent the hamming distance and sets of bytes as vectors
* from the query vector.
* */
template<size_t NROWS>
constexpr void get_all_neighbor_sets_arr(
const std::array<uint8_t, NROWS>& inputVector,
const std::array<ConstexprBitset<256>, NROWS>& inputSets,
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out)
{
get_all_neighbor_sets_arr_impl(inputVector, inputSets, out,
std::make_index_sequence<NROWS>{});
}
template<size_t NROWS>
constexpr void get_neighbor_sets_1_arr(
const std::array<uint8_t, NROWS>& inputVector,
const std::array<ConstexprBitset<256>, NROWS>& inputSets,
std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& out)
{
get_neighbor_sets_1_arr_impl(inputVector, inputSets, out,
std::make_index_sequence<NROWS>{});
}
template<size_t NROWS, size_t... Is>
constexpr bool is_fake_vector_impl(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& inputSet,
std::index_sequence<Is...>)
{
return ((inputSet[Is][0].isAllUnset()) || ...);
}
template<size_t NROWS>
constexpr bool is_fake_vector(const std::array<std::array<ConstexprBitset<256>, 8>, NROWS>& inputSet) {
return is_fake_vector_impl(inputSet, std::make_index_sequence<NROWS>{});
}
/**
* This expands neighbors from the bitsets to flat array,
* slower than neighbor sets method
* */
constexpr size_t get_all_neighbors(uint8_t byte, const ConstexprBitset<256>& set,
std::array<uint8_t, 256>& out) {
constexpr auto& table1 = hamming_set<1>;
constexpr auto& table2 = hamming_set<2>;
constexpr auto& table3 = hamming_set<3>;
constexpr auto& table4 = hamming_set<4>;
constexpr auto& table5 = hamming_set<5>;
constexpr auto& table6 = hamming_set<6>;
constexpr auto& table7 = hamming_set<7>;
size_t total = 0;
const ConstexprBitset myNeighbors1 = set & table1[byte];
const ConstexprBitset myNeighbors2 = set & table2[byte];
const ConstexprBitset myNeighbors3 = set & table3[byte];
const ConstexprBitset myNeighbors4 = set & table4[byte];
const ConstexprBitset myNeighbors5 = set & table5[byte];
const ConstexprBitset myNeighbors6 = set & table6[byte];
const ConstexprBitset myNeighbors7 = set & table7[byte];
total += myNeighbors1.getSetIndexes(out.data() + total);
total += myNeighbors2.getSetIndexes(out.data() + total);
total += myNeighbors3.getSetIndexes(out.data() + total);
total += myNeighbors4.getSetIndexes(out.data() + total);
total += myNeighbors5.getSetIndexes(out.data() + total);
total += myNeighbors6.getSetIndexes(out.data() + total);
total += myNeighbors7.getSetIndexes(out.data() + total);
return total;
}
static void quantizePerfTesting() {
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)};
size_t total = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 10000000; ++i){
const auto q1 = quantize_unrolled<100, 8>(numbers);
total += q1[0];
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Function 8bit Quantize execution time: " << duration.count() << " microseconds" << std::endl;
printf("total is %zu\n", total);
}
static void quantizePerfTesting16() {
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)};
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 10000000; ++i){
volatile const auto q1 = quantize_unrolled16<100, 16>(numbers);
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Function 16bit Quantize execution time: " << duration.count() << " microseconds" << std::endl;
}
static void quantizePerfTesting32() {
const std::array<float, 100> numbers = {randomFloatInRange(0.0, 2.6)};
size_t total = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 10000000; ++i){
const auto q1 = quantize_unrolled32<100>(numbers);
total += q1[0];
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Function 32bit Quantize execution time: " << duration.count() << " microseconds" << std::endl;
printf("total is %zu\n", total);
}
static void endToEndPerfTest() {
constexpr size_t testRowSize = 96;
static constexpr size_t collectionSize = 100000000;
static constexpr size_t queryTestCount = 1000000;
std::array<ConstexprBitset<256>, testRowSize> myCollection;
// fill collection
for (size_t i = 0; i < collectionSize; ++i)
{
const std::array<float, testRowSize> generatedRow = make_random_array<testRowSize>();
const std::array<uint8_t, testRowSize> myByteRow = quantize_unrolled<testRowSize, 8>(generatedRow);
for (size_t j = 0; j < myByteRow.size(); ++j)
{
myCollection[j] &= get_identity_bitset(myByteRow[j]);
}
}
const auto queryRow = make_random_array<testRowSize>();
std::array<std::array<ConstexprBitset<256>, 8>, testRowSize> queryResult;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < queryTestCount; ++i)
{
const auto myByteQuery = quantize_unrolled<testRowSize, 8>(queryRow);
get_neighbor_sets_1_arr<testRowSize>( myByteQuery, myCollection, queryResult);
//PermutationIterator<testRowSize> iter(queryResult);
//iter.next();
//Function endToEndPerfTest execution time: 144397 microseconds
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Function " << __FUNCTION__ << " execution time: " << duration.count() << " microseconds" << std::endl;
}
static void perfTesting(char* argv[]) {
{
int int_value = std::stoi(argv[1]);
size_t total = 0;
auto start = std::chrono::high_resolution_clock::now();
std::array<uint8_t, 256> myArray;
ConstexprBitset<256> mySet;
mySet.set(39);
mySet.set(143);
for (int i = 0; i < 10000000; ++i)
{
total += get_all_neighbors((uint8_t)int_value, mySet,
myArray);
}
auto end = std::chrono::high_resolution_clock::now();
// Calculate the duration
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
printf("total was %zu\n", total);
// Print the elapsed time
std::cout << "Function execution time: " << duration.count() << " microseconds" << std::endl;
}
{
int int_value = std::stoi(argv[1]);
auto start = std::chrono::high_resolution_clock::now();
volatile size_t number = 0xff;
std::array<ConstexprBitset<256>, 8> myArray;
ConstexprBitset<256> mySet;
mySet.set(59);
mySet.set(113);
mySet.set(8);
mySet.set(91);
int i = 10000000;
while(--i)
{
get_all_neighbor_sets(i & number, mySet,
myArray);
}
auto end = std::chrono::high_resolution_clock::now();
// Calculate the duration
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
// Print the elapsed time
std::cout << "Function2 execution time: " << duration.count() << " nanoseconds" << std::endl;
}
/**
-------------
total was 20000000
Function execution time: 114794 microseconds
total was 6988044573398270032, set is 134217728
Function2 execution time: 6623583 nanoseconds
* */
}
static void perfTesting2(char* argv[]) {
{
int int_value = std::stoi(argv[1]);
size_t total = 0;
auto start = std::chrono::high_resolution_clock::now();
std::array<uint8_t, 256> myArray;
ConstexprBitset<256> mySet;
mySet.set(39);
mySet.set(143);
for (int i = 0; i < 10000000; ++i)
{
total += get_all_neighbors((uint8_t)int_value, mySet,
myArray);
}
auto end = std::chrono::high_resolution_clock::now();
// Calculate the duration
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
printf("total was %zu\n", total);
// Print the elapsed time
std::cout << "Function execution time: " << duration.count() << " microseconds" << std::endl;
}
{
int int_value = std::stoi(argv[1]);
volatile size_t number = 0xff;
auto start = std::chrono::high_resolution_clock::now();
std::array<ConstexprBitset<256>, 8> myArray;
ConstexprBitset<256> mySet;
mySet.set(56);
mySet.set(113);
mySet.set(8);
mySet.set(91);
for (int i = 0; i < 10000000; ++i)
{
get_neighbor_set_1(i & number, mySet,
myArray);
//total += myArray[1].blocks[1];
}
auto end = std::chrono::high_resolution_clock::now();
// Calculate the duration
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
// Print the elapsed time
std::cout << "Function3 test 2 execution time: " << duration.count() << " nanoseconds" << std::endl;
}
}
int main(int argc, char* argv[]) {
constexpr uint8_t v = 0b00001111;
constexpr auto neighbors = hamming_neighbors<2, v>();
constexpr auto neighbors2 = hamming_neighbors<1, v>();
for (auto n : neighbors) {
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n";
}
puts("------------");
for (auto n : neighbors2) {
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n";
}
constexpr auto& table1 = hamming_table<1>;
constexpr auto& table2 = hamming_table<1>;
puts("------------");
for (auto n : table1[0b1101]) {
std::cout << std::bitset<8>(n) << " (" << int(n) << ")\n";
}
puts("-------------");
//perfTesting(argv);
//perfTesting2(argv);
//quantizePerfTesting();
//quantizePerfTesting16();
//quantizePerfTesting32();
endToEndPerfTest();
/**total was 20000000
Function execution time: 120658 microseconds
i is now 80
Function2 execution time: 3108209 nanoseconds
total was 20000000
Function execution time: 109372 microseconds
Function3 test 2 execution time: 3118209 nanoseconds
Function 8bit Quantize execution time: 35470 microseconds
total is 0
Function 16bit Quantize execution time: 39884 microseconds
total is 224130000000
Function 32bit Quantize execution time: 119789 microseconds
total is 41428131840000000*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment