Created
February 10, 2014 19:21
-
-
Save kenahoo/8922376 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp; | |
//' Find "detectable change points" in a numeric data frame | |
//' | |
//' If you have a huge data frame that you want to do a line-plot of, this | |
//' function can help down-sample the data to remove consecutive redundancies | |
//' that would be undetectable in a plot whose resolution is higher than | |
//' \code{thresh}. It's similar *in spirit* to something like | |
//' \code{x <- diff(as.matrix(df)); which(apply(x, 1, function(r) any(r>thresh)))}, | |
//' but much faster, doesn't need additional memory, and catches slow drifts | |
//' that the aforecoded would miss. | |
//' | |
//' Our algorithm selects the first row in \code{df}, then progressively selects | |
//' the next row whose Manhattan distance is more than \code{thresh} away from | |
//' the previous selected row. The first and last rows are always selected. | |
//' | |
//' @param df A numeric data frame. | |
//' @param thresh The threshold to screen by. | |
//' @return A numeric vector of indices into \code{df}. | |
//' | |
//' @export | |
//' | |
// [[Rcpp::export]] | |
std::vector<long> downsampleForPlot(DataFrame df, std::vector<double> thresh) { | |
std::vector<double>::size_type m = df.length(); | |
if (m>1 && thresh.size() == 1) { | |
thresh.resize(m, thresh[0]); | |
} else if (m != thresh.size()) { | |
Rcpp::stop("Vector of thresholds must be length 1 or the same length as the number of columns in 'df'"); | |
} | |
std::vector<long> out(0); | |
int n = df.nrows(); | |
if (n==0) return out; | |
int last_i = 0; | |
out.push_back(last_i + 1); | |
NumericVector cols[m]; | |
for(size_t j=0; j<m; j++) { | |
cols[j] = df[j]; | |
} | |
for(int i=1; i<n; i++) { | |
for(size_t j=0; j<m; j++) { | |
if(fabs(cols[j][i]-cols[j][last_i])>thresh[j]) { | |
out.push_back(i+1); | |
last_i = i; | |
break; | |
} | |
} | |
} | |
// Select the nth row if it wasn't already selected. | |
if(last_i < n-1) | |
out.push_back(n); | |
return out; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment