Last active
June 20, 2018 23:07
-
-
Save slwu89/abe59992df437b2a19eb2afb8aa4d3af to your computer and use it in GitHub Desktop.
testing various flavors of multinomial sampling algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <Rcpp.h> | |
| #include <random> | |
| #include <vector> | |
| using namespace Rcpp; | |
| // [[Rcpp::plugins(cpp11)]] | |
| typedef std::mt19937 RNG; | |
| inline double beta_1_b(double b, RNG& generator) | |
| { | |
| std::uniform_real_distribution<double> stdunif(0.0, 1.0); | |
| return 1.0 - pow(stdunif(generator), 1.0/b); | |
| } | |
| /* ################################################################################ | |
| * Startek, M. (2016). An asymptotically optimal, online algorithm for weighted random sampling with replacement, 1–11. Retrieved from http://arxiv.org/abs/1611.00532 | |
| ################################################################################ */ | |
| // [[Rcpp::export]] | |
| std::vector<int> multinom_online(const std::vector<double>& probs,int samples, double switchover=1.0){ | |
| std::random_device rd; | |
| std::mt19937 gen(rd()); | |
| std::vector<int> sample(probs.size(),0); | |
| double pprob = 0.0; | |
| double cprob = 0.0; | |
| unsigned int pidx = 0; | |
| while(samples > 0) | |
| { | |
| pprob += probs[pidx]; | |
| while(((pprob - cprob) * samples / (1.0 - cprob)) < switchover) | |
| { | |
| cprob += beta_1_b(samples,gen) * (1.0 - cprob); | |
| while(pprob < cprob) | |
| pprob += probs[++pidx]; | |
| if(sample.size() == pidx) | |
| sample[pidx] = 1; | |
| else | |
| sample[pidx] += 1; | |
| samples--; | |
| if(samples == 0) | |
| break; | |
| } | |
| if(samples == 0) | |
| break; | |
| double p = (pprob-cprob)/(1.0-cprob); | |
| int nrtaken(0); | |
| if(p >= 1.){ | |
| nrtaken = size; | |
| } else { | |
| std::binomial_distribution<int> rbinom(size, p); | |
| nrtaken = rbinom(rng); | |
| } | |
| if(nrtaken > 0) | |
| { | |
| if(sample.size() == pidx) | |
| sample[pidx] = nrtaken; | |
| else | |
| sample[pidx] += nrtaken; | |
| } | |
| samples -= nrtaken; | |
| pidx++; | |
| cprob = pprob; | |
| } | |
| return sample; | |
| } | |
| /* ################################################################################ | |
| * conditional binomial sampling | |
| ################################################################################ */ | |
| // [[Rcpp::export]] | |
| std::vector<int> multinom_condbinom(const std::vector<double>& probs,int samples){ | |
| std::random_device rd; | |
| std::mt19937 rng(rd()); | |
| int K = probs.size(); /* number of bins */ | |
| std::vector<int>sample(K,0); /* multinomial random variable (vector with K bins) */ | |
| int n(samples); /* number of things sampled so far */ | |
| double p_tot(0.); /* total probability */ | |
| for(auto &it : probs){ | |
| p_tot += it; | |
| } | |
| /* generate first K-1 dimensions of multinomial RV via conditional binomial approach */ | |
| for(size_t k=0; k<K-1; k++){ | |
| if(probs[k] > 0.){ | |
| double p = probs[k]/p_tot; | |
| std::binomial_distribution<int>rbinom(n,p); | |
| sample[k] = ((p < 1.) ? rbinom(rng) : n); | |
| n -= sample[k]; | |
| } else { | |
| sample[k] = 0; | |
| } | |
| if(n <= 0){ | |
| return sample; | |
| } | |
| p_tot -= probs[k]; | |
| } | |
| sample[K-1] = n; | |
| return sample; | |
| }; | |
| /*** R | |
| probs = rlnorm(n = 20) | |
| probs = probs/sum(probs) | |
| samples = 500 | |
| testo = t(replicate(n = 1e4,expr = multinom_online(probs,samples))) | |
| testc = t(replicate(n = 1e4,expr = multinom_condbinom(probs,samples))) | |
| rtest = t(replicate(n = 1e4,expr = rmultinom(1,samples,probs),simplify = "matrix")) | |
| colMeans(testo) | |
| colMeans(testc) | |
| colMeans(rtest) | |
| makeParams <- function(n,bins){ | |
| probs = rlnorm(bins); probs = probs/sum(probs) | |
| return( | |
| list( | |
| samples=n, | |
| probs=probs | |
| ) | |
| ) | |
| } | |
| n_low_bins_high <- makeParams(n = 10,bins = 1e3) | |
| n_high_bins_low <- makeParams(n = 1e3,bins = 10) | |
| library(microbenchmark) | |
| microbenchmark( | |
| multinom_online(n_low_bins_high$probs,n_low_bins_high$samples), | |
| multinom_condbinom(n_low_bins_high$probs,n_low_bins_high$samples) | |
| ) | |
| microbenchmark( | |
| multinom_online(n_high_bins_low$probs,n_high_bins_low$samples), | |
| multinom_condbinom(n_high_bins_low$probs,n_high_bins_low$samples) | |
| ) | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment