Skip to content

Instantly share code, notes, and snippets.

@apples
Created May 5, 2020 06:03
Show Gist options
  • Select an option

  • Save apples/e3fd9785a260e86f1f669b4f4b3f0a05 to your computer and use it in GitHub Desktop.

Select an option

Save apples/e3fd9785a260e86f1f669b4f4b3f0a05 to your computer and use it in GitHub Desktop.
Implementations of algorithms A-Res and A-ExpJ from Efraimidis and Spirakis.
#include <iostream>
#include <random>
#include <vector>
#include <queue>
#include <cmath>
#include <array>
#include <algorithm>
#include <iterator>
/*
Pavlos S. Efraimidis, Paul G. Spirakis,
Weighted random sampling with a reservoir,
Information Processing Letters,
Volume 97, Issue 5,
2006,
Pages 181-185,
ISSN 0020-0190,
https://doi.org/10.1016/j.ipl.2005.11.003.
*/
/// input item
struct weighted_item {
int value = 0;
float weight = 0;
};
/// scored item for the queue
struct scored_item {
weighted_item item = {};
float score = 0;
};
/// queue will contain items with the lowest scores (highest rolls)
auto operator<(scored_item const& a, scored_item const& b) {
return a.score < b.score;
}
/// implements Algorithm A-Res from Efraimidis-Spirakis
template <typename Rng>
auto efraimidis_spirakis_sampler(
std::vector<weighted_item> const& items, /// input sequence
int const result_size, /// maximum size of results
Rng& rng /// random number generator
) {
auto reservoir = std::priority_queue<scored_item>{};
auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};
for (auto const& item : items) {
// score is logarithmically congruent to the "key" in Efraimidis-Spirakis
auto score = -std::log(random_dist(rng)) / float(item.weight);
if (reservoir.size() < result_size) {
// if the queue isn't full, then there's nothing to replace
reservoir.push({ item, score });
} else {
// reservoir will always contain the items with the lowest scores
if (score < reservoir.top().score) {
reservoir.pop();
reservoir.push({ item, score });
}
}
}
auto results = std::vector<weighted_item>{};
results.reserve(reservoir.size());
while (!reservoir.empty()) {
results.push_back(reservoir.top().item);
reservoir.pop();
}
return results;
}
/// implements Algorithm A-ExpJ from Efraimidis-Spirakis
template <typename Rng>
auto efraimidis_spirakis_jump_sampler(
std::vector<weighted_item> const& items, /// input sequence
int const result_size, /// maximum size of results
Rng& rng /// random number generator
) {
auto reservoir = std::priority_queue<scored_item>{};
auto random_dist = std::uniform_real_distribution{0.0f, 1.0f};
auto get_score = [&rng, &random_dist](auto const& item){
return std::pow(random_dist(rng), 1.0f / float(item.weight));
};
auto i = begin(items);
auto e = end(items);
for (; i != e && reservoir.size() < result_size; ++i) {
auto const& item = *i;
auto score = get_score(item);
reservoir.push({ item, -score });
}
auto skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);
for (; i != e; ++i) {
auto const& item = *i;
skip_weight -= item.weight;
if (skip_weight <= 0) {
auto t = std::pow(-reservoir.top().score, item.weight);
auto adjusted_dist = std::uniform_real_distribution{t, 1.0f};
auto adjusted_score = std::pow(adjusted_dist(rng), 1.0f / item.weight);
reservoir.pop();
reservoir.push({ item, -adjusted_score });
skip_weight = std::log(random_dist(rng))/std::log(-reservoir.top().score);
}
}
auto results = std::vector<weighted_item>{};
results.reserve(reservoir.size());
while (!reservoir.empty()) {
results.push_back(reservoir.top().item);
reservoir.pop();
}
return results;
}
template <typename Rng>
class rng_counter {
public:
rng_counter(Rng& rng) : rng(&rng) {}
using result_type = typename Rng::result_type;
result_type operator()() {
++count;
return (*rng)();
}
static constexpr result_type min() { return Rng::min(); }
static constexpr result_type max() { return Rng::max(); }
int get_count() const { return count; }
private:
Rng* rng;
int count;
};
int main() {
auto rng = std::mt19937_64{std::random_device{}()};
auto weight_dist = std::uniform_int_distribution{1, 100};
auto items = std::vector<weighted_item>{};
auto N_items = 10000;
for (int i = 0; i < N_items; ++i) {
items.push_back({ i, float(weight_dist(rng)) });
}
auto hits = std::vector<int>{}; /// size must match number of items
std::fill_n(std::back_inserter(hits), N_items, 0);
auto N = 10'000; /// number of test runs for averaging
auto count_rng = rng_counter(rng);
for (int i = 0; i < N; ++i) {
auto results = efraimidis_spirakis_jump_sampler(items, 2, count_rng);
for (auto const& item : results) {
++hits[item.value];
}
}
// print results
for (int i = 0; i < N_items; ++i) {
std::cout
<< i << "\t"
<< items[i].weight << "\t"
<< (100*hits[i]/N) << "%" << std::endl;
}
std::cout << "\nAvg. rng calls per run: " << (count_rng.get_count() / double(N)) << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment