Voultapher · November 26, 2025 09:31
diff --git a/bloom.h b/bloom.h
 // Copyright (c) 2025, Lukas Bergdoll.
 // Use of this source code is governed by the Apache 2.0 license.

 #include <cstdlib>
 #include <math.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <vector>

 // Fast u64 optimized bloom filter by Lukas Bergdoll.
 class BloomU64
 {
 public:
    BloomU64(size_t expected_items, float false_positive_rate)
    {
        // By having a larger space to map into we get a better than requested
        // false-positive rate, but that's ok especially for smaller sizes it
        // counteracts the inherent quantization.

        if (expected_items == 0)
        {
            hash_num = 0;
            return;
        }

        const auto ln_fpr = log(false_positive_rate);
        hash_num = static_cast<uint32_t>(round(-(ln_fpr / log(2))));

        const auto total_bits =
            static_cast<size_t>(round((-2.08 * ln_fpr) * static_cast<float>(expected_items)));

        auto next_power_of_two = static_cast<uint32_t>(ceil(log2(total_bits)));
        next_power_of_two = next_power_of_two < 3 ? 3 : next_power_of_two;

        shift_num = (sizeof(size_t) * 8) - next_power_of_two;

        bits.assign(1ull << (next_power_of_two - 3), 0);
    }

    void add(uint64_t x)
    {
        for (size_t i = 0; i < hash_num; ++i)
        {
            const size_t idx = map_to_range(hash_mix(x, i), shift_num);
            set_bit(idx);
        }
    }

    bool contains(uint64_t x) const
    {
        for (size_t i = 0; i < hash_num; ++i)
        {
            const size_t idx = map_to_range(hash_mix(x, i), shift_num);
            if (!get_bit(idx))
            {
                return false;
            }
        }
        return true;
    }

 private:
    uint32_t hash_num;
    uint32_t shift_num;
    std::vector<uint8_t> bits;

    void set_bit(size_t idx)
    {
        bits[idx >> 3] |= static_cast<uint8_t>(1u << (idx & 7));
    }

    bool get_bit(size_t idx) const
    {
        return bits[idx >> 3] & static_cast<uint8_t>(1u << (idx & 7));
    }

    static size_t map_to_range(uint64_t x, uint32_t s)
    {
        // Based on
        // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
        // Essentially a faster modulo, assuming the range we are mapping into
        // is a power of two.
        constexpr uint64_t GOLDEN_RATIO = 11400714819323197439ULL;
        return static_cast<size_t>(x * GOLDEN_RATIO) >> s;
    }

    static uint64_t hash_mix(uint64_t a, uint64_t b)
    {
        return (a ^ 0x243f6a8885a308d3ULL) * (b ^ 0xc0ac29b7c97c50ddULL);
    }
 };
	// Copyright (c) 2025, Lukas Bergdoll.
	// Use of this source code is governed by the Apache 2.0 license.

	#include <cstdlib>
	#include <math.h>
	#include <stddef.h>
	#include <stdint.h>
	#include <vector>

	// Fast u64 optimized bloom filter by Lukas Bergdoll.
	class BloomU64
	{
	public:
	BloomU64(size_t expected_items, float false_positive_rate)
	{
	// By having a larger space to map into we get a better than requested
	// false-positive rate, but that's ok especially for smaller sizes it
	// counteracts the inherent quantization.

	if (expected_items == 0)
	{
	hash_num = 0;
	return;
	}

	const auto ln_fpr = log(false_positive_rate);
	hash_num = static_cast<uint32_t>(round(-(ln_fpr / log(2))));

	const auto total_bits =
	static_cast<size_t>(round((-2.08 * ln_fpr) * static_cast<float>(expected_items)));

	auto next_power_of_two = static_cast<uint32_t>(ceil(log2(total_bits)));
	next_power_of_two = next_power_of_two < 3 ? 3 : next_power_of_two;

	shift_num = (sizeof(size_t) * 8) - next_power_of_two;

	bits.assign(1ull << (next_power_of_two - 3), 0);
	}

	void add(uint64_t x)
	{
	for (size_t i = 0; i < hash_num; ++i)
	{
	const size_t idx = map_to_range(hash_mix(x, i), shift_num);
	set_bit(idx);
	}
	}

	bool contains(uint64_t x) const
	{
	for (size_t i = 0; i < hash_num; ++i)
	{
	const size_t idx = map_to_range(hash_mix(x, i), shift_num);
	if (!get_bit(idx))
	{
	return false;
	}
	}
	return true;
	}

	private:
	uint32_t hash_num;
	uint32_t shift_num;
	std::vector<uint8_t> bits;

	void set_bit(size_t idx)
	{
	bits[idx >> 3] \|= static_cast<uint8_t>(1u << (idx & 7));
	}

	bool get_bit(size_t idx) const
	{
	return bits[idx >> 3] & static_cast<uint8_t>(1u << (idx & 7));
	}

	static size_t map_to_range(uint64_t x, uint32_t s)
	{
	// Based on
	// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
	// Essentially a faster modulo, assuming the range we are mapping into
	// is a power of two.
	constexpr uint64_t GOLDEN_RATIO = 11400714819323197439ULL;
	return static_cast<size_t>(x * GOLDEN_RATIO) >> s;
	}

	static uint64_t hash_mix(uint64_t a, uint64_t b)
	{
	return (a ^ 0x243f6a8885a308d3ULL) * (b ^ 0xc0ac29b7c97c50ddULL);
	}
	};
No results found