Created
December 30, 2013 19:56
-
-
Save romainfrancois/8187193 to your computer and use it in GitHub Desktop.
unordered pairs. This thread http://permalink.gmane.org/gmane.comp.lang.r.rcpp/6562 on Rcpp-devel
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ndiff <- 2597797 | |
f <- function(){ | |
n <- round(runif( 1, min = 4, max = 10 )) | |
paste( sample(letters, n, replace = TRUE ), collapse = "" ) | |
} | |
a <- replicate( ndiff, f() ) | |
b <- replicate( ndiff, f() ) | |
rows <- sample( 1:ndiff, 3018671, replace = T ) | |
cols <- sample( c(T,F), 3018671, replace = T ) | |
m <- cbind( | |
ifelse(cols, a, b), | |
ifelse(cols, b, a) | |
)[rows,] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp ; | |
class StringPairHash{ | |
public: | |
StringPairHash( const StringMatrix& data_, int colA, int colB ) : data(data_){ | |
int nr = data.nrow() ; | |
SEXP* start = Rcpp::internal::r_vector_start<STRSXP>(data) ; | |
columnA = start + nr*(colA-1) ; | |
columnB = start + nr*(colB-1) ; | |
} | |
size_t operator()(int i){ | |
SEXP x = columnA[i] ; | |
SEXP y = columnB[i] ; | |
if( x < y ){ | |
return hash(x,y) ; | |
} else { | |
return hash(y,x) ; | |
} | |
} | |
private: | |
inline size_t hash(SEXP first, SEXP second){ | |
std::hash<SEXP> hasher ; | |
size_t seed = hasher(first) ; | |
seed ^= hasher(second) + 0x9e3779b9 + (seed<<6) + (seed>>2); | |
return seed ; | |
} | |
StringMatrix data ; | |
SEXP* columnA ; | |
SEXP* columnB ; | |
} ; | |
class StringPairEqual{ | |
public: | |
StringPairEqual( const StringMatrix& data_, int colA, int colB ) : data(data_){ | |
int nr = data.nrow() ; | |
SEXP* start = Rcpp::internal::r_vector_start<STRSXP>(data) ; | |
columnA = start + nr*(colA-1) ; | |
columnB = start + nr*(colB-1) ; | |
} | |
bool operator()(int i, int j){ | |
SEXP xA = columnA[i], xB = columnB[i] ; | |
SEXP yA = columnA[j], yB = columnB[j] ; | |
return ( xA == yA && xB == yB ) || ( xA == yB && xB == yA ) ; | |
} | |
private: | |
StringMatrix data ; | |
SEXP* columnA ; | |
SEXP* columnB ; | |
} ; | |
typedef std::unordered_set<int,StringPairHash,StringPairEqual> Set ; | |
// [[Rcpp::export]] | |
SEXP unordered_pairs( StringMatrix input, int colA, int colB ){ | |
StringPairHash hasher( input, colA, colB) ; | |
StringPairEqual equal( input, colA, colB) ; | |
Set set( 1024, hasher, equal ) ; | |
int nr = input.nrow() ; | |
for( int i=0; i<nr; i++) | |
set.insert(i) ; | |
int n = set.size() ; | |
StringMatrix out(n,2) ; | |
Set::const_iterator it = set.begin() ; | |
for( int i=0; i<n; i++, ++it){ | |
int j = *it ; | |
out(i,0) = input(i,colA-1) ; | |
out(i,1) = input(i,colB-1) ; | |
} | |
return out ; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> microbenchmark( unordered_pairs( m, 1, 2 ), times=10 ) | |
Unit: seconds | |
expr min lq median uq max neval | |
unordered_pairs(m, 1, 2) 2.534141 2.536889 2.556607 2.714883 2.804312 10 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment