Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active September 4, 2025 00:39
Show Gist options
  • Save jweinst1/17f6bd624bfe6514e48ef1ba594463ac to your computer and use it in GitHub Desktop.
Save jweinst1/17f6bd624bfe6514e48ef1ba594463ac to your computer and use it in GitHub Desktop.
Runtime calculation of hamming neighbor benchmark
#include <array>
#include <cstdint>
#include <iostream>
#include <random>
#include <chrono>
class RegisterBitset1024 {
public:
static constexpr size_t NUM_BITS = 1024;
static constexpr size_t NUM_REGS = NUM_BITS / 64;
std::array<uint64_t, NUM_REGS> bits{};
// Insert a key index (0 <= key_idx < 1024)
void insert(size_t key_idx) {
size_t reg = key_idx >> 6; // divide by 64
size_t bit = key_idx & 63; // modulo 64
bits[reg] |= (1ULL << bit);
}
// Check existence
bool exists(size_t key_idx) const {
size_t reg = key_idx >> 6; // divide by 64
size_t bit = key_idx & 63; // modulo 64
return (bits[reg] & (1ULL << bit)) != 0;
}
// Optional: count total set bits
size_t count() const {
size_t cnt = 0;
for (auto r : bits) cnt += __builtin_popcountll(r);
return cnt;
}
};
// Demo
int main() {
RegisterBitset1024 rbs;
std::mt19937_64 rng(time(nullptr));
std::uniform_int_distribution<size_t> dist(0, 1023);
// Insert 100 random keys
for (size_t i = 0; i < 100; ++i) rbs.insert(dist(rng));
// Micro-benchmark existence check
size_t sink = 0;
auto start = std::chrono::high_resolution_clock::now();
for (size_t q = 0; q < 10000000; ++q) {
if (rbs.exists(q & 0b111111)) sink++;
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> dur = end - start;
std::cout << "Existence queries completed in " << dur.count() << " s, sink=" << sink << "\n";
//Existence queries completed in 0.00392025 s, sink=937500
return 0;
}
#include <array>
#include <cstdint>
#include <iostream>
#include <random>
#include <chrono>
#include <bitset>
// This test shows how a very compact bitset can be done with minimal false positives
// A prefix / suffix of the number is used as an index to some partitioned table,
// and then a simple OR is used to check the membership.
// the partition is meant to increase entropy.
inline void runtime_neighbors(uint64_t x, std::array<uint64_t, 64>& out) {
for (size_t i = 0; i < 64; ++i) {
out[i] = x ^ (1ULL << i);
}
}
struct u64set {
uint64_t branches[256] = {0};
inline void store(uint64_t number) {
branches[number & 0xff] |= number;
}
inline bool contains(uint64_t number) const {
return (branches[number & 0xff] & (number)) == number;
}
inline void print() const {
for (int i = 0; i < 256; ++i)
{
std::cout << (size_t)i << ")))" << std::bitset<64>(branches[i]) << "\n";
}
}
};
static void u64setTest() {
u64set s;
s.store(475478382);
s.store(475978782);
s.store(475478389);
s.store(135478382);
s.store(975478319);
s.print();
if (s.contains(475978781)) {
std::cout << "Contains dup!\n";
}
if (s.contains(476978781)) {
std::cout << "Contains dup!\n";
}
}
// Demo
int main() {
u64setTest();
return 0;
}
#include <array>
#include <cstdint>
#include <iostream>
#include <vector>
#include <random>
#include <chrono>
// ============================================================
// Method 1: Precomputed lookup per byte
// ============================================================
using ByteNeighbors = std::array<std::array<uint8_t, 8>, 256>;
// Build lookup table: for each byte value, store all Hamming-1 flips
ByteNeighbors build_byte_neighbors() {
ByteNeighbors table{};
for (int b = 0; b < 256; ++b) {
for (int i = 0; i < 8; ++i) {
table[b][i] = static_cast<uint8_t>(b ^ (1 << i));
}
}
return table;
}
// Lookup neighbors of a 64-bit number byte by byte
inline void precomputed_neighbors(uint64_t x,
const ByteNeighbors& table,
std::array<uint64_t, 64>& out) {
size_t idx = 0;
for (int byte = 0; byte < 8; ++byte) {
uint8_t val = (x >> (byte * 8)) & 0xFF;
for (int i = 0; i < 8; ++i) {
uint64_t flipped = (x & ~(0xFFULL << (byte * 8))) |
(uint64_t(table[val][i]) << (byte * 8));
out[idx++] = flipped;
}
}
}
// ============================================================
// Method 2: On-the-fly neighbor generation
// ============================================================
inline void runtime_neighbors(uint64_t x, std::array<uint64_t, 64>& out) {
for (size_t i = 0; i < 64; ++i) {
out[i] = x ^ (1ULL << i);
}
}
// ============================================================
// Benchmark
// ============================================================
int main() {
constexpr size_t N = 10'000'000; // number of queries
std::vector<uint64_t> data(N);
// Random generator
std::mt19937_64 rng(42);
std::uniform_int_distribution<uint64_t> dist;
for (size_t i = 0; i < N; ++i) {
data[i] = dist(rng);
}
ByteNeighbors table = build_byte_neighbors();
std::array<uint64_t, 64> neighbors;
// Benchmark: Precomputed lookup
auto t1 = std::chrono::high_resolution_clock::now();
uint64_t sink1 = 0;
for (size_t i = 0; i < N; ++i) {
precomputed_neighbors(data[i], table, neighbors);
sink1 += neighbors[0]; // prevent optimization
}
auto t2 = std::chrono::high_resolution_clock::now();
// Benchmark: Runtime generation
auto t3 = std::chrono::high_resolution_clock::now();
uint64_t sink2 = 0;
for (size_t i = 0; i < N; ++i) {
runtime_neighbors(data[i], neighbors);
sink2 += neighbors[0];
}
auto t4 = std::chrono::high_resolution_clock::now();
auto precomputed_time = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
auto runtime_time = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3);
std::cout << "Precomputed lookup: " << precomputed_time.count()
<< " ms, sink=" << sink1 << "\n";
std::cout << "Runtime generation: " << runtime_time.count()
<< " ms, sink=" << sink2 << "\n";
//Precomputed lookup: 181 ms, sink=10185275016095337641
//Runtime generation: 1 ms, sink=10185275016095337641
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment