romainfrancois · November 7, 2017 11:41 · romainfrancois · Nov 8, 2017
diff --git a/nacount.cpp b/nacount.cpp
 #include <Rcpp.h>
 #include <RcppParallel.h>

 // [[Rcpp::depends(RcppParallel)]]
 // [[Rcpp::plugins(cpp11)]]
 using namespace Rcpp;

 // [[Rcpp::export]]
 int count_baseline( NumericVector x ){
  return std::count( x.begin(), x.end(), 3.0 ) ;
 }

 // [[Rcpp::export]]
 int count_na_rapi( NumericVector x ){
  return std::count_if( x.begin(), x.end(), R_IsNA ) ;
 }

 // [[Rcpp::export]]
 int count_na_rcpp( NumericVector x ){
  return std::count_if( x.begin(), x.end(), NumericVector::is_na ) ;
 }

 // [[Rcpp::export]]
 int count_na_proposed( NumericVector x ){
  // interpret the vector as a vector of 64 bits integers
  uint64_t* p = reinterpret_cast<uint64_t*>(x.begin()) ;
  
  // get the 64 bit integer for a quiet NA
  uint64_t na = *reinterpret_cast<uint64_t*>(&NA_REAL) ;
  
  // the mask to nuke the 13th bit
  uint64_t mask = ~( (uint64_t(1) << 51 ) );
  
  // using a lambda for the actual work
  return std::count_if(p, p + x.size(), [na,mask](uint64_t y){
    return ( y & mask ) == na ;
  }) ;
  
 }

 // [[Rcpp::export]]
 int par_count_baseline( NumericVector x ){
  
  auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
    return init + std::count( r.begin(), r.end(), 3.0 );
  } ;
  
  return tbb::parallel_reduce( 
    tbb::blocked_range<double*>(x.begin(), x.end()),
    0, 
    count_chunk, 
    []( int x, int y){ return x+y; }
  ) ;
  
 }

 // [[Rcpp::export]]
 int par_count_rapi( NumericVector x ){
  
  auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
    return init + std::count_if( r.begin(), r.end(), R_IsNA );
  } ;
  
  return tbb::parallel_reduce( 
    tbb::blocked_range<double*>(x.begin(), x.end()),
    0, 
    count_chunk, 
    []( int x, int y){ return x+y; }
  ) ;
  
 }

 // [[Rcpp::export]]
 int par_count_na_rcpp( NumericVector x ){
  
  auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
    return init + std::count_if( r.begin(), r.end(), NumericVector::is_na );
  } ;
  
  return tbb::parallel_reduce( 
    tbb::blocked_range<double*>(x.begin(), x.end()),
    0, 
    count_chunk, 
    []( int x, int y){ return x+y; }
  ) ;
  
 }

 // [[Rcpp::export]]
 int par_count_na_proposed( NumericVector x ){
  
  // interpret the vector as a vector of 64 bits integers
  uint64_t* p = reinterpret_cast<uint64_t*>(x.begin()) ;
  
  // get the 64 bit integer for a quiet NA
  uint64_t na = *reinterpret_cast<uint64_t*>(&NA_REAL) ;
  
  // the mask to nuke the 13th bit
  uint64_t mask = ~( (uint64_t(1) << 51 ) );
  
  auto count_chunk = [=](const tbb::blocked_range<uint64_t*>& r, int init) -> int {
    return init + std::count_if( r.begin(), r.end(), [=](uint64_t y){
      return ( y & mask ) == na ;
    }) ;
  };
  
  return tbb::parallel_reduce( 
    tbb::blocked_range<uint64_t*>(p, p + x.size()),
    0, 
    count_chunk, 
    []( int x, int y){ return x+y; }
  ) ;
  
 }

 /*** R
 library(microbenchmark)

 bench <- function(n){
  x <- rnorm(n)
  x[ sample(n, n/10) ] <- NA 
  
  microbenchmark(
    R = sum(is.na(x)),
    count_baseline = count_baseline(x), 
    count_na_rapi = count_na_rapi(x), 
    count_na_rcpp = count_na_rcpp(x), 
    count_na_proposed = count_na_proposed(x), 
    
    par_count_baseline = par_count_baseline(x), 
    par_count_rapi = par_count_rapi(x), 
    par_count_na_rcpp = par_count_na_rcpp(x), 
    par_count_na_proposed = par_count_na_proposed(x)
  )
 }
 # bench(1e5)
 # bench(1e6)
 */
	#include <Rcpp.h>
	#include <RcppParallel.h>

	// [[Rcpp::depends(RcppParallel)]]
	// [[Rcpp::plugins(cpp11)]]
	using namespace Rcpp;

	// [[Rcpp::export]]
	int count_baseline( NumericVector x ){
	return std::count( x.begin(), x.end(), 3.0 ) ;
	}

	// [[Rcpp::export]]
	int count_na_rapi( NumericVector x ){
	return std::count_if( x.begin(), x.end(), R_IsNA ) ;
	}

	// [[Rcpp::export]]
	int count_na_rcpp( NumericVector x ){
	return std::count_if( x.begin(), x.end(), NumericVector::is_na ) ;
	}

	// [[Rcpp::export]]
	int count_na_proposed( NumericVector x ){
	// interpret the vector as a vector of 64 bits integers
	uint64_t* p = reinterpret_cast<uint64_t*>(x.begin()) ;

	// get the 64 bit integer for a quiet NA
	uint64_t na = reinterpret_cast<uint64_t>(&NA_REAL) ;

	// the mask to nuke the 13th bit
	uint64_t mask = ~( (uint64_t(1) << 51 ) );

	// using a lambda for the actual work
	return std::count_if(p, p + x.size(), [na,mask](uint64_t y){
	return ( y & mask ) == na ;
	}) ;

	}

	// [[Rcpp::export]]
	int par_count_baseline( NumericVector x ){

	auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
	return init + std::count( r.begin(), r.end(), 3.0 );
	} ;

	return tbb::parallel_reduce(
	tbb::blocked_range<double*>(x.begin(), x.end()),
	0,
	count_chunk,
	[]( int x, int y){ return x+y; }
	) ;

	}

	// [[Rcpp::export]]
	int par_count_rapi( NumericVector x ){

	auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
	return init + std::count_if( r.begin(), r.end(), R_IsNA );
	} ;

	return tbb::parallel_reduce(
	tbb::blocked_range<double*>(x.begin(), x.end()),
	0,
	count_chunk,
	[]( int x, int y){ return x+y; }
	) ;

	}

	// [[Rcpp::export]]
	int par_count_na_rcpp( NumericVector x ){

	auto count_chunk = [=](const tbb::blocked_range<double*>& r, int init) -> int {
	return init + std::count_if( r.begin(), r.end(), NumericVector::is_na );
	} ;

	return tbb::parallel_reduce(
	tbb::blocked_range<double*>(x.begin(), x.end()),
	0,
	count_chunk,
	[]( int x, int y){ return x+y; }
	) ;

	}

	// [[Rcpp::export]]
	int par_count_na_proposed( NumericVector x ){

	// interpret the vector as a vector of 64 bits integers
	uint64_t* p = reinterpret_cast<uint64_t*>(x.begin()) ;

	// get the 64 bit integer for a quiet NA
	uint64_t na = reinterpret_cast<uint64_t>(&NA_REAL) ;

	// the mask to nuke the 13th bit
	uint64_t mask = ~( (uint64_t(1) << 51 ) );

	auto count_chunk = [=](const tbb::blocked_range<uint64_t*>& r, int init) -> int {
	return init + std::count_if( r.begin(), r.end(), [=](uint64_t y){
	return ( y & mask ) == na ;
	}) ;
	};

	return tbb::parallel_reduce(
	tbb::blocked_range<uint64_t*>(p, p + x.size()),
	0,
	count_chunk,
	[]( int x, int y){ return x+y; }
	) ;

	}

	/*** R
	library(microbenchmark)

	bench <- function(n){
	x <- rnorm(n)
	x[ sample(n, n/10) ] <- NA

	microbenchmark(
	R = sum(is.na(x)),
	count_baseline = count_baseline(x),
	count_na_rapi = count_na_rapi(x),
	count_na_rcpp = count_na_rcpp(x),
	count_na_proposed = count_na_proposed(x),

	par_count_baseline = par_count_baseline(x),
	par_count_rapi = par_count_rapi(x),
	par_count_na_rcpp = par_count_na_rcpp(x),
	par_count_na_proposed = par_count_na_proposed(x)
	)
	}
	# bench(1e5)
	# bench(1e6)
	*/