apples · May 5, 2020 06:03
diff --git a/efraimidis_spirakis.cpp b/efraimidis_spirakis.cpp
 #include <iostream>
 #include <random>
 #include <vector>
 #include <queue>
 #include <cmath>
 #include <array>
 #include <algorithm>
 #include <iterator>

 /*
 Pavlos S. Efraimidis, Paul G. Spirakis,
 Weighted random sampling with a reservoir,
 Information Processing Letters,
 Volume 97, Issue 5,
 2006,
 Pages 181-185,
 ISSN 0020-0190,
 https://doi.org/10.1016/j.ipl.2005.11.003.
 */

 /// input item
 struct weighted_item {
    int value = 0;
    float weight = 0;
 };

 /// scored item for the queue
 struct scored_item {
    weighted_item item = {};
    float score = 0;
 };

 /// queue will contain items with the lowest scores (highest rolls)
 auto operator<(scored_item const& a, scored_item const& b) {
    return a.score < b.score;
 }

 /// implements Algorithm A-Res from Efraimidis-Spirakis
 template <typename Rng>
 auto efraimidis_spirakis_sampler(
    std::vector<weighted_item> const& items, /// input sequence
    int const result_size, /// maximum size of results
    Rng& rng /// random number generator
 ) {
    auto reservoir = std::priority_queue<scored_item>{};
    auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};

    for (auto const& item : items) {
        // score is logarithmically congruent to the "key" in Efraimidis-Spirakis
        auto score = -std::log(random_dist(rng)) / float(item.weight);

        if (reservoir.size() < result_size) {
            // if the queue isn't full, then there's nothing to replace
            reservoir.push({ item, score });
        } else {
            // reservoir will always contain the items with the lowest scores
            if (score < reservoir.top().score) {
                reservoir.pop();
                reservoir.push({ item, score });
            }
        }
    }

    auto results = std::vector<weighted_item>{};

    results.reserve(reservoir.size());

    while (!reservoir.empty()) {
        results.push_back(reservoir.top().item);
        reservoir.pop();
    }

    return results;
 }

 /// implements Algorithm A-ExpJ from Efraimidis-Spirakis
 template <typename Rng>
 auto efraimidis_spirakis_jump_sampler(
    std::vector<weighted_item> const& items, /// input sequence
    int const result_size, /// maximum size of results
    Rng& rng /// random number generator
 ) {
    auto reservoir = std::priority_queue<scored_item>{};
    auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};

    auto get_score = [&rng, &random_dist](auto const& item){
        return std::pow(random_dist(rng), 1.0f / float(item.weight));
    };

    auto i = begin(items);
    auto e = end(items);

    for (; i != e && reservoir.size() < result_size; ++i) {
        auto const& item = *i;
        auto score = get_score(item);

        reservoir.push({ item, -score });
    }

    auto skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);

    for (; i != e; ++i) {
        auto const& item = *i;

        skip_weight -= item.weight;

        if (skip_weight <= 0) {
            auto t = std::pow(-reservoir.top().score, item.weight);
            auto adjusted_dist = std::uniform_real_distribution{t, 1.0f};
            auto adjusted_score = std::pow(adjusted_dist(rng), 1.0f / item.weight);

            reservoir.pop();
            reservoir.push({ item, -adjusted_score });

            skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);
        }
    }

    auto results = std::vector<weighted_item>{};
    results.reserve(reservoir.size());

    while (!reservoir.empty()) {
        results.push_back(reservoir.top().item);
        reservoir.pop();
    }

    return results;
 }

 template <typename Rng>
 class rng_counter {
    public:
    rng_counter(Rng& rng) : rng(&rng) {}

    using result_type = typename Rng::result_type;

    result_type operator()() {
        ++count;
        return (*rng)();
    }

    static constexpr result_type min() { return Rng::min(); }
    static constexpr result_type max() { return Rng::max(); }

    int get_count() const { return count; }

    private:
    Rng* rng;
    int count;
 };

 int main() {
    auto rng = std::mt19937_64{std::random_device{}()};

    auto weight_dist = std::uniform_int_distribution{1, 100};

    auto items = std::vector<weighted_item>{};

    auto N_items = 10000;

    for (int i = 0; i < N_items; ++i) {
        items.push_back({ i, float(weight_dist(rng)) });
    }

    auto hits = std::vector<int>{}; /// size must match number of items

    std::fill_n(std::back_inserter(hits), N_items, 0);

    auto N = 10'000; /// number of test runs for averaging

    auto count_rng = rng_counter(rng);

    for (int i = 0; i < N; ++i) {
        auto results = efraimidis_spirakis_jump_sampler(items, 2, count_rng);

        for (auto const& item : results) {
            ++hits[item.value];
        }
    }

    // print results
    for (int i = 0; i < N_items; ++i) {
        std::cout
            << i << "\t"
            << items[i].weight << "\t"
            << (100*hits[i]/N) << "%" << std::endl;
    }

    std::cout << "\nAvg. rng calls per run: " << (count_rng.get_count() / double(N)) << std::endl;
 }
	#include <iostream>
	#include <random>
	#include <vector>
	#include <queue>
	#include <cmath>
	#include <array>
	#include <algorithm>
	#include <iterator>

	/*
	Pavlos S. Efraimidis, Paul G. Spirakis,
	Weighted random sampling with a reservoir,
	Information Processing Letters,
	Volume 97, Issue 5,
	2006,
	Pages 181-185,
	ISSN 0020-0190,
	https://doi.org/10.1016/j.ipl.2005.11.003.
	*/

	/// input item
	struct weighted_item {
	int value = 0;
	float weight = 0;
	};

	/// scored item for the queue
	struct scored_item {
	weighted_item item = {};
	float score = 0;
	};

	/// queue will contain items with the lowest scores (highest rolls)
	auto operator<(scored_item const& a, scored_item const& b) {
	return a.score < b.score;
	}

	/// implements Algorithm A-Res from Efraimidis-Spirakis
	template <typename Rng>
	auto efraimidis_spirakis_sampler(
	std::vector<weighted_item> const& items, /// input sequence
	int const result_size, /// maximum size of results
	Rng& rng /// random number generator
	) {
	auto reservoir = std::priority_queue<scored_item>{};
	auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};

	for (auto const& item : items) {
	// score is logarithmically congruent to the "key" in Efraimidis-Spirakis
	auto score = -std::log(random_dist(rng)) / float(item.weight);

	if (reservoir.size() < result_size) {
	// if the queue isn't full, then there's nothing to replace
	reservoir.push({ item, score });
	} else {
	// reservoir will always contain the items with the lowest scores
	if (score < reservoir.top().score) {
	reservoir.pop();
	reservoir.push({ item, score });
	}
	}
	}

	auto results = std::vector<weighted_item>{};

	results.reserve(reservoir.size());

	while (!reservoir.empty()) {
	results.push_back(reservoir.top().item);
	reservoir.pop();
	}

	return results;
	}

	/// implements Algorithm A-ExpJ from Efraimidis-Spirakis
	template <typename Rng>
	auto efraimidis_spirakis_jump_sampler(
	std::vector<weighted_item> const& items, /// input sequence
	int const result_size, /// maximum size of results
	Rng& rng /// random number generator
	) {
	auto reservoir = std::priority_queue<scored_item>{};
	auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};

	auto get_score = [&rng, &random_dist](auto const& item){
	return std::pow(random_dist(rng), 1.0f / float(item.weight));
	};

	auto i = begin(items);
	auto e = end(items);

	for (; i != e && reservoir.size() < result_size; ++i) {
	auto const& item = *i;
	auto score = get_score(item);

	reservoir.push({ item, -score });
	}

	auto skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);

	for (; i != e; ++i) {
	auto const& item = *i;

	skip_weight -= item.weight;

	if (skip_weight <= 0) {
	auto t = std::pow(-reservoir.top().score, item.weight);
	auto adjusted_dist = std::uniform_real_distribution{t, 1.0f};
	auto adjusted_score = std::pow(adjusted_dist(rng), 1.0f / item.weight);

	reservoir.pop();
	reservoir.push({ item, -adjusted_score });

	skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);
	}
	}

	auto results = std::vector<weighted_item>{};
	results.reserve(reservoir.size());

	while (!reservoir.empty()) {
	results.push_back(reservoir.top().item);
	reservoir.pop();
	}

	return results;
	}

	template <typename Rng>
	class rng_counter {
	public:
	rng_counter(Rng& rng) : rng(&rng) {}

	using result_type = typename Rng::result_type;

	result_type operator()() {
	++count;
	return (*rng)();
	}

	static constexpr result_type min() { return Rng::min(); }
	static constexpr result_type max() { return Rng::max(); }

	int get_count() const { return count; }

	private:
	Rng* rng;
	int count;
	};

	int main() {
	auto rng = std::mt19937_64{std::random_device{}()};

	auto weight_dist = std::uniform_int_distribution{1, 100};

	auto items = std::vector<weighted_item>{};

	auto N_items = 10000;

	for (int i = 0; i < N_items; ++i) {
	items.push_back({ i, float(weight_dist(rng)) });
	}

	auto hits = std::vector<int>{}; /// size must match number of items

	std::fill_n(std::back_inserter(hits), N_items, 0);

	auto N = 10'000; /// number of test runs for averaging

	auto count_rng = rng_counter(rng);

	for (int i = 0; i < N; ++i) {
	auto results = efraimidis_spirakis_jump_sampler(items, 2, count_rng);

	for (auto const& item : results) {
	++hits[item.value];
	}
	}

	// print results
	for (int i = 0; i < N_items; ++i) {
	std::cout
	<< i << "\t"
	<< items[i].weight << "\t"
	<< (100*hits[i]/N) << "%" << std::endl;
	}

	std::cout << "\nAvg. rng calls per run: " << (count_rng.get_count() / double(N)) << std::endl;
	}
No results found