Created
January 22, 2014 17:54
-
-
Save RanaivosonHerimanitra/8563752 to your computer and use it in GitHub Desktop.
improvement of the previous "impute" code :instead of doing the entire loop,index of missing rows have been already registered...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp; | |
// [[Rcpp::export]] | |
List modiframe2(DataFrame& df ) { | |
//nrow and ncol of the dataframe: | |
int nrow = df.nrows(), ncol= df.size() ; | |
double moy(0); | |
//define an empty list~dataframe | |
//List output(ncol) | |
List output=clone(df); | |
for( int i=0; i<ncol; i++) { | |
NumericVector tmp=df[i]; | |
std::vector<int> missval(nrow); | |
int k=0; | |
for (int j=0; j<nrow; j++) { | |
if ( R_IsNA(tmp[j])==false ) { | |
moy=moy+tmp[j]; | |
missval.pop_back(); | |
} else { | |
missval[k]=j; | |
} | |
} | |
moy=moy/nrow; | |
int msize= missval.size(); | |
//instead of relooping, I have already stored index of missing rows | |
for ( int u=0; u<msize; u++) { | |
tmp[missval[u]]=moy; | |
} | |
output[i]=tmp; | |
} | |
return wrap(output) ; | |
} | |
/*** R | |
library(microbenchmark) | |
microbenchmark(modiframe(train), modiframe2(train) ) | |
#Unit: milliseconds | |
# expr min lq median uq max neval | |
# modiframe(train) 855.1824 880.3109 993.4610 2060.079 2673.451 100 | |
# modiframe2(train) 621.3642 716.5141 780.8182 1632.077 2285.674 100 | |
*/ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment