Last active
September 4, 2025 00:39
-
-
Save jweinst1/17f6bd624bfe6514e48ef1ba594463ac to your computer and use it in GitHub Desktop.
Runtime calculation of hamming neighbor benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <iostream> | |
#include <random> | |
#include <chrono> | |
class RegisterBitset1024 { | |
public: | |
static constexpr size_t NUM_BITS = 1024; | |
static constexpr size_t NUM_REGS = NUM_BITS / 64; | |
std::array<uint64_t, NUM_REGS> bits{}; | |
// Insert a key index (0 <= key_idx < 1024) | |
void insert(size_t key_idx) { | |
size_t reg = key_idx >> 6; // divide by 64 | |
size_t bit = key_idx & 63; // modulo 64 | |
bits[reg] |= (1ULL << bit); | |
} | |
// Check existence | |
bool exists(size_t key_idx) const { | |
size_t reg = key_idx >> 6; // divide by 64 | |
size_t bit = key_idx & 63; // modulo 64 | |
return (bits[reg] & (1ULL << bit)) != 0; | |
} | |
// Optional: count total set bits | |
size_t count() const { | |
size_t cnt = 0; | |
for (auto r : bits) cnt += __builtin_popcountll(r); | |
return cnt; | |
} | |
}; | |
// Demo | |
int main() { | |
RegisterBitset1024 rbs; | |
std::mt19937_64 rng(time(nullptr)); | |
std::uniform_int_distribution<size_t> dist(0, 1023); | |
// Insert 100 random keys | |
for (size_t i = 0; i < 100; ++i) rbs.insert(dist(rng)); | |
// Micro-benchmark existence check | |
size_t sink = 0; | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (size_t q = 0; q < 10000000; ++q) { | |
if (rbs.exists(q & 0b111111)) sink++; | |
} | |
auto end = std::chrono::high_resolution_clock::now(); | |
std::chrono::duration<double> dur = end - start; | |
std::cout << "Existence queries completed in " << dur.count() << " s, sink=" << sink << "\n"; | |
//Existence queries completed in 0.00392025 s, sink=937500 | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <iostream> | |
#include <random> | |
#include <chrono> | |
#include <bitset> | |
// This test shows how a very compact bitset can be done with minimal false positives | |
// A prefix / suffix of the number is used as an index to some partitioned table, | |
// and then a simple OR is used to check the membership. | |
// the partition is meant to increase entropy. | |
inline void runtime_neighbors(uint64_t x, std::array<uint64_t, 64>& out) { | |
for (size_t i = 0; i < 64; ++i) { | |
out[i] = x ^ (1ULL << i); | |
} | |
} | |
struct u64set { | |
uint64_t branches[256] = {0}; | |
inline void store(uint64_t number) { | |
branches[number & 0xff] |= number; | |
} | |
inline bool contains(uint64_t number) const { | |
return (branches[number & 0xff] & (number)) == number; | |
} | |
inline void print() const { | |
for (int i = 0; i < 256; ++i) | |
{ | |
std::cout << (size_t)i << ")))" << std::bitset<64>(branches[i]) << "\n"; | |
} | |
} | |
}; | |
static void u64setTest() { | |
u64set s; | |
s.store(475478382); | |
s.store(475978782); | |
s.store(475478389); | |
s.store(135478382); | |
s.store(975478319); | |
s.print(); | |
if (s.contains(475978781)) { | |
std::cout << "Contains dup!\n"; | |
} | |
if (s.contains(476978781)) { | |
std::cout << "Contains dup!\n"; | |
} | |
} | |
// Demo | |
int main() { | |
u64setTest(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <cstdint> | |
#include <iostream> | |
#include <vector> | |
#include <random> | |
#include <chrono> | |
// ============================================================ | |
// Method 1: Precomputed lookup per byte | |
// ============================================================ | |
using ByteNeighbors = std::array<std::array<uint8_t, 8>, 256>; | |
// Build lookup table: for each byte value, store all Hamming-1 flips | |
ByteNeighbors build_byte_neighbors() { | |
ByteNeighbors table{}; | |
for (int b = 0; b < 256; ++b) { | |
for (int i = 0; i < 8; ++i) { | |
table[b][i] = static_cast<uint8_t>(b ^ (1 << i)); | |
} | |
} | |
return table; | |
} | |
// Lookup neighbors of a 64-bit number byte by byte | |
inline void precomputed_neighbors(uint64_t x, | |
const ByteNeighbors& table, | |
std::array<uint64_t, 64>& out) { | |
size_t idx = 0; | |
for (int byte = 0; byte < 8; ++byte) { | |
uint8_t val = (x >> (byte * 8)) & 0xFF; | |
for (int i = 0; i < 8; ++i) { | |
uint64_t flipped = (x & ~(0xFFULL << (byte * 8))) | | |
(uint64_t(table[val][i]) << (byte * 8)); | |
out[idx++] = flipped; | |
} | |
} | |
} | |
// ============================================================ | |
// Method 2: On-the-fly neighbor generation | |
// ============================================================ | |
inline void runtime_neighbors(uint64_t x, std::array<uint64_t, 64>& out) { | |
for (size_t i = 0; i < 64; ++i) { | |
out[i] = x ^ (1ULL << i); | |
} | |
} | |
// ============================================================ | |
// Benchmark | |
// ============================================================ | |
int main() { | |
constexpr size_t N = 10'000'000; // number of queries | |
std::vector<uint64_t> data(N); | |
// Random generator | |
std::mt19937_64 rng(42); | |
std::uniform_int_distribution<uint64_t> dist; | |
for (size_t i = 0; i < N; ++i) { | |
data[i] = dist(rng); | |
} | |
ByteNeighbors table = build_byte_neighbors(); | |
std::array<uint64_t, 64> neighbors; | |
// Benchmark: Precomputed lookup | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
uint64_t sink1 = 0; | |
for (size_t i = 0; i < N; ++i) { | |
precomputed_neighbors(data[i], table, neighbors); | |
sink1 += neighbors[0]; // prevent optimization | |
} | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
// Benchmark: Runtime generation | |
auto t3 = std::chrono::high_resolution_clock::now(); | |
uint64_t sink2 = 0; | |
for (size_t i = 0; i < N; ++i) { | |
runtime_neighbors(data[i], neighbors); | |
sink2 += neighbors[0]; | |
} | |
auto t4 = std::chrono::high_resolution_clock::now(); | |
auto precomputed_time = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
auto runtime_time = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3); | |
std::cout << "Precomputed lookup: " << precomputed_time.count() | |
<< " ms, sink=" << sink1 << "\n"; | |
std::cout << "Runtime generation: " << runtime_time.count() | |
<< " ms, sink=" << sink2 << "\n"; | |
//Precomputed lookup: 181 ms, sink=10185275016095337641 | |
//Runtime generation: 1 ms, sink=10185275016095337641 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment