Created
May 5, 2020 06:03
-
-
Save apples/e3fd9785a260e86f1f669b4f4b3f0a05 to your computer and use it in GitHub Desktop.
Implementations of algorithms A-Res and A-ExpJ from Efraimidis and Spirakis.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <random> | |
| #include <vector> | |
| #include <queue> | |
| #include <cmath> | |
| #include <array> | |
| #include <algorithm> | |
| #include <iterator> | |
| /* | |
| Pavlos S. Efraimidis, Paul G. Spirakis, | |
| Weighted random sampling with a reservoir, | |
| Information Processing Letters, | |
| Volume 97, Issue 5, | |
| 2006, | |
| Pages 181-185, | |
| ISSN 0020-0190, | |
| https://doi.org/10.1016/j.ipl.2005.11.003. | |
| */ | |
| /// input item | |
| struct weighted_item { | |
| int value = 0; | |
| float weight = 0; | |
| }; | |
| /// scored item for the queue | |
| struct scored_item { | |
| weighted_item item = {}; | |
| float score = 0; | |
| }; | |
| /// queue will contain items with the lowest scores (highest rolls) | |
| auto operator<(scored_item const& a, scored_item const& b) { | |
| return a.score < b.score; | |
| } | |
| /// implements Algorithm A-Res from Efraimidis-Spirakis | |
| template <typename Rng> | |
| auto efraimidis_spirakis_sampler( | |
| std::vector<weighted_item> const& items, /// input sequence | |
| int const result_size, /// maximum size of results | |
| Rng& rng /// random number generator | |
| ) { | |
| auto reservoir = std::priority_queue<scored_item>{}; | |
| auto random_dist = std::uniform_real_distribution{0.0f, 1.0f}; | |
| for (auto const& item : items) { | |
| // score is logarithmically congruent to the "key" in Efraimidis-Spirakis | |
| auto score = -std::log(random_dist(rng)) / float(item.weight); | |
| if (reservoir.size() < result_size) { | |
| // if the queue isn't full, then there's nothing to replace | |
| reservoir.push({ item, score }); | |
| } else { | |
| // reservoir will always contain the items with the lowest scores | |
| if (score < reservoir.top().score) { | |
| reservoir.pop(); | |
| reservoir.push({ item, score }); | |
| } | |
| } | |
| } | |
| auto results = std::vector<weighted_item>{}; | |
| results.reserve(reservoir.size()); | |
| while (!reservoir.empty()) { | |
| results.push_back(reservoir.top().item); | |
| reservoir.pop(); | |
| } | |
| return results; | |
| } | |
| /// implements Algorithm A-ExpJ from Efraimidis-Spirakis | |
| template <typename Rng> | |
| auto efraimidis_spirakis_jump_sampler( | |
| std::vector<weighted_item> const& items, /// input sequence | |
| int const result_size, /// maximum size of results | |
| Rng& rng /// random number generator | |
| ) { | |
| auto reservoir = std::priority_queue<scored_item>{}; | |
| auto random_dist = std::uniform_real_distribution{0.0f, 1.0f}; | |
| auto get_score = [&rng, &random_dist](auto const& item){ | |
| return std::pow(random_dist(rng), 1.0f / float(item.weight)); | |
| }; | |
| auto i = begin(items); | |
| auto e = end(items); | |
| for (; i != e && reservoir.size() < result_size; ++i) { | |
| auto const& item = *i; | |
| auto score = get_score(item); | |
| reservoir.push({ item, -score }); | |
| } | |
| auto skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score); | |
| for (; i != e; ++i) { | |
| auto const& item = *i; | |
| skip_weight -= item.weight; | |
| if (skip_weight <= 0) { | |
| auto t = std::pow(-reservoir.top().score, item.weight); | |
| auto adjusted_dist = std::uniform_real_distribution{t, 1.0f}; | |
| auto adjusted_score = std::pow(adjusted_dist(rng), 1.0f / item.weight); | |
| reservoir.pop(); | |
| reservoir.push({ item, -adjusted_score }); | |
| skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score); | |
| } | |
| } | |
| auto results = std::vector<weighted_item>{}; | |
| results.reserve(reservoir.size()); | |
| while (!reservoir.empty()) { | |
| results.push_back(reservoir.top().item); | |
| reservoir.pop(); | |
| } | |
| return results; | |
| } | |
| template <typename Rng> | |
| class rng_counter { | |
| public: | |
| rng_counter(Rng& rng) : rng(&rng) {} | |
| using result_type = typename Rng::result_type; | |
| result_type operator()() { | |
| ++count; | |
| return (*rng)(); | |
| } | |
| static constexpr result_type min() { return Rng::min(); } | |
| static constexpr result_type max() { return Rng::max(); } | |
| int get_count() const { return count; } | |
| private: | |
| Rng* rng; | |
| int count; | |
| }; | |
| int main() { | |
| auto rng = std::mt19937_64{std::random_device{}()}; | |
| auto weight_dist = std::uniform_int_distribution{1, 100}; | |
| auto items = std::vector<weighted_item>{}; | |
| auto N_items = 10000; | |
| for (int i = 0; i < N_items; ++i) { | |
| items.push_back({ i, float(weight_dist(rng)) }); | |
| } | |
| auto hits = std::vector<int>{}; /// size must match number of items | |
| std::fill_n(std::back_inserter(hits), N_items, 0); | |
| auto N = 10'000; /// number of test runs for averaging | |
| auto count_rng = rng_counter(rng); | |
| for (int i = 0; i < N; ++i) { | |
| auto results = efraimidis_spirakis_jump_sampler(items, 2, count_rng); | |
| for (auto const& item : results) { | |
| ++hits[item.value]; | |
| } | |
| } | |
| // print results | |
| for (int i = 0; i < N_items; ++i) { | |
| std::cout | |
| << i << "\t" | |
| << items[i].weight << "\t" | |
| << (100*hits[i]/N) << "%" << std::endl; | |
| } | |
| std::cout << "\nAvg. rng calls per run: " << (count_rng.get_count() / double(N)) << std::endl; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment