Skip to content

Instantly share code, notes, and snippets.

@romainfrancois
Created November 7, 2017 20:31
Show Gist options
  • Save romainfrancois/3254264b34100cda40846f9e1fbe29c9 to your computer and use it in GitHub Desktop.
Save romainfrancois/3254264b34100cda40846f9e1fbe29c9 to your computer and use it in GitHub Desktop.
#include <Rcpp.h>
using namespace Rcpp ;
// [[Rcpp::plugins(cpp11)]]
inline uint64_t as_uint64_t(double x){
return *reinterpret_cast<uint64_t*>( &x ) ;
}
inline uint64_t quiet_na(){
return as_uint64_t(NA_REAL) ;
}
inline uint64_t signaling_na(){
return as_uint64_t(NA_REAL + 1);
}
inline uint64_t na_mask(){
return ~( signaling_na() - quiet_na() ) ;
}
// [[Rcpp::export]]
int sum_is_na( NumericVector x ){
// a quiet NA, as a uint64_t
uint64_t na = quiet_na() ;
// mask (binary representation of (signaling NA) - (quiet NA))
uint64_t mask = na_mask() ;
return std::count_if(
reinterpret_cast<uint64_t*>(x.begin()),
reinterpret_cast<uint64_t*>(x.end()),
[na,mask]( uint64_t value ){
return (value & mask) == na ;
}
) ;
}
inline double as_double( uint64_t x){
return *reinterpret_cast<double*>( &x ) ;
}
// [[Rcpp::export]]
double NA_mask(){
return as_double(na_mask()) ;
}
/*** R
seven31::reveal( NA, NA+1, NA_mask() )
library(microbenchmark)
bench <- function(n = 1e6){
x <- rnorm(n)
x[ sample(n, n/10) ] <- NA
microbenchmark(
sum(is.na(x)),
sum_is_na(x)
)
}
bench(1e6)
*/
@romainfrancois
Copy link
Author

>   seven31::reveal( NA, NA+1, NA_mask() )
0 11111111111 ( NaN ) 0000000000000000000000000000000000000000011110100010 : NA
0 11111111111 ( NaN ) 1000000000000000000000000000000000000000011110100010 : NA + 1
1 11111111111 ( NaN ) 0111111111111111111111111111111111111111111111111111 : NA_mask()
> 
> bench(1e6)
Unit: microseconds
          expr      min       lq      mean    median        uq      max neval cld
 sum(is.na(x)) 1864.742 2042.391 2821.2612 2131.4925 2827.3220 5757.621   100   b
  sum_is_na(x)  880.911  923.280  946.8286  937.3885  965.7565 1107.136   100  a 
> 
> bench(1e7)
Unit: milliseconds
          expr       min       lq      mean    median        uq       max neval cld
 sum(is.na(x)) 22.070579 23.54548 29.371915 30.271672 32.112142 147.06545   100   b
  sum_is_na(x)  6.416144  6.65291  7.110524  6.858852  7.203312  10.21368   100  a 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment