Skip to content

Instantly share code, notes, and snippets.

@slwu89
Last active June 20, 2018 23:07
Show Gist options
  • Save slwu89/abe59992df437b2a19eb2afb8aa4d3af to your computer and use it in GitHub Desktop.
Save slwu89/abe59992df437b2a19eb2afb8aa4d3af to your computer and use it in GitHub Desktop.
testing various flavors of multinomial sampling algorithms
#include <Rcpp.h>
#include <random>
#include <vector>
using namespace Rcpp;
// [[Rcpp::plugins(cpp11)]]
typedef std::mt19937 RNG;
inline double beta_1_b(double b, RNG& generator)
{
std::uniform_real_distribution<double> stdunif(0.0, 1.0);
return 1.0 - pow(stdunif(generator), 1.0/b);
}
/* ################################################################################
* Startek, M. (2016). An asymptotically optimal, online algorithm for weighted random sampling with replacement, 1–11. Retrieved from http://arxiv.org/abs/1611.00532
################################################################################ */
// [[Rcpp::export]]
std::vector<int> multinom_online(const std::vector<double>& probs,int samples, double switchover=1.0){
std::random_device rd;
std::mt19937 gen(rd());
std::vector<int> sample(probs.size(),0);
double pprob = 0.0;
double cprob = 0.0;
unsigned int pidx = 0;
while(samples > 0)
{
pprob += probs[pidx];
while(((pprob - cprob) * samples / (1.0 - cprob)) < switchover)
{
cprob += beta_1_b(samples,gen) * (1.0 - cprob);
while(pprob < cprob)
pprob += probs[++pidx];
if(sample.size() == pidx)
sample[pidx] = 1;
else
sample[pidx] += 1;
samples--;
if(samples == 0)
break;
}
if(samples == 0)
break;
double p = (pprob-cprob)/(1.0-cprob);
int nrtaken(0);
if(p >= 1.){
nrtaken = size;
} else {
std::binomial_distribution<int> rbinom(size, p);
nrtaken = rbinom(rng);
}
if(nrtaken > 0)
{
if(sample.size() == pidx)
sample[pidx] = nrtaken;
else
sample[pidx] += nrtaken;
}
samples -= nrtaken;
pidx++;
cprob = pprob;
}
return sample;
}
/* ################################################################################
* conditional binomial sampling
################################################################################ */
// [[Rcpp::export]]
std::vector<int> multinom_condbinom(const std::vector<double>& probs,int samples){
std::random_device rd;
std::mt19937 rng(rd());
int K = probs.size(); /* number of bins */
std::vector<int>sample(K,0); /* multinomial random variable (vector with K bins) */
int n(samples); /* number of things sampled so far */
double p_tot(0.); /* total probability */
for(auto &it : probs){
p_tot += it;
}
/* generate first K-1 dimensions of multinomial RV via conditional binomial approach */
for(size_t k=0; k<K-1; k++){
if(probs[k] > 0.){
double p = probs[k]/p_tot;
std::binomial_distribution<int>rbinom(n,p);
sample[k] = ((p < 1.) ? rbinom(rng) : n);
n -= sample[k];
} else {
sample[k] = 0;
}
if(n <= 0){
return sample;
}
p_tot -= probs[k];
}
sample[K-1] = n;
return sample;
};
/*** R
probs = rlnorm(n = 20)
probs = probs/sum(probs)
samples = 500
testo = t(replicate(n = 1e4,expr = multinom_online(probs,samples)))
testc = t(replicate(n = 1e4,expr = multinom_condbinom(probs,samples)))
rtest = t(replicate(n = 1e4,expr = rmultinom(1,samples,probs),simplify = "matrix"))
colMeans(testo)
colMeans(testc)
colMeans(rtest)
makeParams <- function(n,bins){
probs = rlnorm(bins); probs = probs/sum(probs)
return(
list(
samples=n,
probs=probs
)
)
}
n_low_bins_high <- makeParams(n = 10,bins = 1e3)
n_high_bins_low <- makeParams(n = 1e3,bins = 10)
library(microbenchmark)
microbenchmark(
multinom_online(n_low_bins_high$probs,n_low_bins_high$samples),
multinom_condbinom(n_low_bins_high$probs,n_low_bins_high$samples)
)
microbenchmark(
multinom_online(n_high_bins_low$probs,n_high_bins_low$samples),
multinom_condbinom(n_high_bins_low$probs,n_high_bins_low$samples)
)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment