Skip to content

Instantly share code, notes, and snippets.

@Voultapher
Last active November 26, 2025 09:31
Show Gist options
  • Select an option

  • Save Voultapher/7694ab2c33cca4fefedf885b99658f31 to your computer and use it in GitHub Desktop.

Select an option

Save Voultapher/7694ab2c33cca4fefedf885b99658f31 to your computer and use it in GitHub Desktop.
// Fast u64 optimized bloom filter by Lukas Bergdoll.
// Copyright (c) 2025, Lukas Bergdoll.
// Use of this source code is governed by the Apache 2.0 license.
#include <cstdlib>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <vector>
// Fast u64 optimized bloom filter by Lukas Bergdoll.
class BloomU64
{
public:
BloomU64(size_t expected_items, float false_positive_rate)
{
// By having a larger space to map into we get a better than requested
// false-positive rate, but that's ok especially for smaller sizes it
// counteracts the inherent quantization.
if (expected_items == 0)
{
hash_num = 0;
return;
}
const auto ln_fpr = log(false_positive_rate);
hash_num = static_cast<uint32_t>(round(-(ln_fpr / log(2))));
const auto total_bits =
static_cast<size_t>(round((-2.08 * ln_fpr) * static_cast<float>(expected_items)));
auto next_power_of_two = static_cast<uint32_t>(ceil(log2(total_bits)));
next_power_of_two = next_power_of_two < 3 ? 3 : next_power_of_two;
shift_num = (sizeof(size_t) * 8) - next_power_of_two;
bits.assign(1ull << (next_power_of_two - 3), 0);
}
void add(uint64_t x)
{
for (size_t i = 0; i < hash_num; ++i)
{
const size_t idx = map_to_range(hash_mix(x, i), shift_num);
set_bit(idx);
}
}
bool contains(uint64_t x) const
{
for (size_t i = 0; i < hash_num; ++i)
{
const size_t idx = map_to_range(hash_mix(x, i), shift_num);
if (!get_bit(idx))
{
return false;
}
}
return true;
}
private:
uint32_t hash_num;
uint32_t shift_num;
std::vector<uint8_t> bits;
void set_bit(size_t idx)
{
bits[idx >> 3] |= static_cast<uint8_t>(1u << (idx & 7));
}
bool get_bit(size_t idx) const
{
return bits[idx >> 3] & static_cast<uint8_t>(1u << (idx & 7));
}
static size_t map_to_range(uint64_t x, uint32_t s)
{
// Based on
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
// Essentially a faster modulo, assuming the range we are mapping into
// is a power of two.
constexpr uint64_t GOLDEN_RATIO = 11400714819323197439ULL;
return static_cast<size_t>(x * GOLDEN_RATIO) >> s;
}
static uint64_t hash_mix(uint64_t a, uint64_t b)
{
return (a ^ 0x243f6a8885a308d3ULL) * (b ^ 0xc0ac29b7c97c50ddULL);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment