Last active
January 1, 2016 16:09
-
-
Save kevinushey/8168394 to your computer and use it in GitHub Desktop.
Faster NA checking with punning
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <Rcpp.h> | |
| using namespace Rcpp; | |
| union DoublePunner { | |
| double d; | |
| uint64_t p; | |
| }; | |
| static DoublePunner NA_PUNNED = { NA_REAL }; | |
| // [[Rcpp::export]] | |
| LogicalVector IsNA_(NumericVector x) { | |
| int n = x.size(); | |
| LogicalVector output = no_init(n); | |
| for (int i=0; i < n; ++i) { | |
| DoublePunner xi = { x[i] }; | |
| output[i] = xi.p == NA_PUNNED.p; | |
| } | |
| return output; | |
| } | |
| // [[Rcpp::export]] | |
| LogicalVector memcmp_IsNA_(NumericVector x) { | |
| int n = x.size(); | |
| LogicalVector output = no_init(n); | |
| for (int i=0; i < n; ++i) { | |
| output[i] = memcmp( | |
| (char*) (&x[i]), | |
| (char*) (&NA_REAL), | |
| sizeof(double) | |
| ) == 0; | |
| } | |
| return output; | |
| } | |
| // [[Rcpp::export]] | |
| LogicalVector R_IsNA_(NumericVector x) { | |
| int n = x.size(); | |
| LogicalVector output = no_init(n); | |
| for (int i=0; i < n; ++i) { | |
| output[i] = R_IsNA(x[i]); | |
| } | |
| return output; | |
| } | |
| /*** R | |
| library(microbenchmark) | |
| noNA <- rnorm(1E6) | |
| allNA <- rep(NA_real_, 1E6) | |
| identical( IsNA_(noNA), R_IsNA_(noNA) ) | |
| identical( IsNA_(noNA), memcmp_IsNA_(noNA) ) | |
| microbenchmark( | |
| IsNA_(noNA), | |
| memcmp_IsNA_(noNA), | |
| R_IsNA_(noNA) | |
| ) | |
| microbenchmark( | |
| IsNA_(allNA), | |
| memcmp_IsNA_(allNA), | |
| R_IsNA_(allNA) | |
| ) | |
| */ | |
| # > microbenchmark( | |
| # + IsNA_(noNA), | |
| # + memcmp_IsNA_(noNA), | |
| # + R_IsNA_(noNA) | |
| # + ) | |
| # Unit: microseconds | |
| # expr min lq median uq max neval | |
| # IsNA_(noNA) 884.262 1099.243 1174.779 1270.940 5260.795 100 | |
| # memcmp_IsNA_(noNA) 933.734 1164.753 1187.450 1282.278 4796.834 100 | |
| # R_IsNA_(noNA) 3198.864 3481.547 3507.371 3661.630 6710.078 100 | |
| # | |
| # > microbenchmark( | |
| # + IsNA_(allNA), | |
| # + memcmp_IsNA_(allNA), | |
| # + R_IsNA_(allNA) | |
| # + ) | |
| # Unit: microseconds | |
| # expr min lq median uq max neval | |
| # IsNA_(allNA) 888.197 1131.228 1171.039 1259.298 4852.233 100 | |
| # memcmp_IsNA_(allNA) 915.552 1140.643 1187.067 1254.516 4892.021 100 | |
| # R_IsNA_(allNA) 2941.444 3200.485 3226.592 3415.858 7474.285 100 |
Author
Done, seems independent (more or less); more importantly, type punning still wins in each case.
Author
One could get identical performance with probably more readable semantics simply using memcmp.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It would be interesting to split the tests in two. What is the peformance when :