Last active
August 27, 2019 02:48
-
-
Save stufield/690ee1c5e8d85017423aa8be457d6a55 to your computer and use it in GitHub Desktop.
SeqId Matching with Rcpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <Rcpp.h> | |
| #include <unordered_map> | |
| using namespace Rcpp; | |
| using namespace std; | |
| /* Trying to speed up matchSeqIds() | |
| Generate a C++ version called matchseq_cpp() | |
| Install inside matchSeqIds() - a wrapper around matchseq_cpp() | |
| */ | |
| // [[Rcpp::export]] | |
| Rcpp::StringVector | |
| matchseq_cpp(CharacterVector x, /* string containing SeqId entries */ | |
| CharacterVector y, /* string containing SeqIds to match x */ | |
| bool order_by_x = true) {. /* logical on ordering */ | |
| Function getSeqId("getSeqId"); /* import external getSeqId() function */ | |
| Function base_intersect("intersect"); /* cannot use sugar; order differs; small speed cost tho */ | |
| CharacterVector x_seqs = getSeqId(x); /* get SeqIds; non-seqs are NA */ | |
| x_seqs = Rcpp::na_omit(x_seqs); /* rm NAs */ | |
| CharacterVector y_seqs = getSeqId(y); /* get SeqIds from 'y' */ | |
| int L = y.size(); | |
| std::unordered_map< std::string, std::string > hashmap; /* initialize hashmap */ | |
| for(int i = 0; i < L; i++) { | |
| hashmap.insert(std::make_pair(y_seqs[i], y[i])); /* populate hashmap */ | |
| } | |
| CharacterVector ord_seqs(L); /* initiate vector for intersect */ | |
| if (order_by_x) { | |
| ord_seqs = base_intersect(x_seqs, y_seqs); /* get SeqId intersection of x & y */ | |
| } else { | |
| ord_seqs = base_intersect(y_seqs, x_seqs); /* get SeqId intersection of y & x */ | |
| } | |
| int n = ord_seqs.size(); | |
| Rcpp::StringVector res(n); | |
| for(int j = 0; j < n; j++) { | |
| /* get the corresponding 'y' values */ | |
| res[j] = hashmap[ Rcpp::as< std::string >(ord_seqs[j]) ]; | |
| } | |
| return res; | |
| } | |
| /***R | |
| x <- c("seq.4554.56", "seq.3714.49", "PlateId") | |
| y <- c("Group", "3714-49", "Assay", "4554-56") | |
| bench::mark( | |
| cpp = matchseq_cpp(x, y), | |
| orig = matchSeqIds(x, y), | |
| iterations = 1000 | |
| ) %>% summary(relative = TRUE) | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment