Last active
April 9, 2017 06:34
-
-
Save artemklevtsov/a6c5c4fb19416daa59943f7fb1f9dc96 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp; | |
RObject get_elem(const RObject& x, std::size_t i) { | |
switch(x.sexp_type()) { | |
case INTSXP: { | |
RObject res = wrap(INTEGER(x)[i]); | |
if (Rf_isFactor(x)) { | |
res.attr("class") = "factor"; | |
res.attr("levels") = x.attr("levels"); | |
} | |
return res; | |
} | |
case REALSXP: return wrap(REAL(x)[i]); | |
case CPLXSXP: return wrap(COMPLEX(x)[i]); | |
case LGLSXP: return wrap(LOGICAL(x)[i]); | |
case STRSXP: return wrap(CHAR(STRING_ELT(x, i))); | |
default: stop("Unsupported type '%s'.", type2name(x)); | |
} | |
} | |
// [[Rcpp::export]] | |
List df2lst(const DataFrame& x) { | |
std::size_t nrows = x.rows(); | |
std::size_t ncols = x.cols(); | |
CharacterVector nms = x.names(); | |
List res(no_init(nrows)); | |
for (std::size_t i = 0; i < nrows; ++i) { | |
List tmp(no_init(ncols)); | |
for (std::size_t j = 0; j < ncols; ++j) | |
tmp[j] = get_elem(VECTOR_ELT(x, j), i); | |
tmp.attr("class") = "data.frame"; | |
tmp.attr("row.names") = 1; | |
tmp.attr("names") = nms; | |
res[i] = tmp; | |
} | |
res.attr("names") = x.attr("row.names"); | |
return res; | |
} | |
// [[Rcpp::export]] | |
List df2lst2(const DataFrame& x) { | |
std::size_t nrows = x.rows(); | |
std::size_t ncols = x.cols(); | |
CharacterVector nms = x.names(); | |
List res(no_init(nrows)); | |
for (std::size_t i = 0; i < nrows; ++i) { | |
List tmp(no_init(ncols)); | |
for (std::size_t j = 0; j < ncols; ++j) { | |
if (is<IntegerVector>(x[j])) { | |
IntegerVector t = as<IntegerVector>(x[j]); | |
if (Rf_isFactor(t)) { | |
RObject t2 = wrap(t[i]); | |
t2.attr("class") = "factor"; | |
t2.attr("levels") = t.attr("levels"); | |
tmp[j] = t2; | |
} else | |
tmp[j] = t[i]; | |
} else if (is<LogicalVector>(x[j])) | |
tmp[j] = as<LogicalVector>(x[j])[i]; | |
else if (is<NumericVector>(x[j])) | |
tmp[j] = as<NumericVector>(x[j])[i]; | |
else if (is<ComplexVector>(x[j])) | |
tmp[j] = as<ComplexVector>(x[j])[i]; | |
else if (is<CharacterVector>(x[j])) | |
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]); | |
else | |
stop("Unsupported type '%s'.", type2name(x)); | |
} | |
tmp.attr("class") = "data.frame"; | |
tmp.attr("row.names") = 1; | |
tmp.attr("names") = nms; | |
res[i] = tmp; | |
} | |
res.attr("names") = x.attr("row.names"); | |
return res; | |
} | |
// [[Rcpp::export]] | |
List df2lst3(const DataFrame& x) { | |
std::size_t nrows = x.rows(); | |
std::size_t ncols = x.cols(); | |
CharacterVector nms = x.names(); | |
List res(no_init(nrows)); | |
for (std::size_t i = 0; i < nrows; ++i) { | |
List tmp(no_init(ncols)); | |
for (std::size_t j = 0; j < ncols; ++j) { | |
switch(TYPEOF(x[j])) { | |
case INTSXP: { | |
IntegerVector t = as<IntegerVector>(x[j]); | |
if (Rf_isFactor(t)) { | |
RObject t2 = wrap(t[i]); | |
t2.attr("class") = "factor"; | |
t2.attr("levels") = t.attr("levels"); | |
tmp[j] = t2; | |
} else | |
tmp[j] = t[i]; | |
break; | |
} | |
case LGLSXP: { | |
tmp[j] = as<LogicalVector>(x[j])[i]; | |
break; | |
} | |
case CPLXSXP: { | |
tmp[j] = as<ComplexVector>(x[j])[i]; | |
break; | |
} | |
case REALSXP: { | |
tmp[j] = as<NumericVector>(x[j])[i]; | |
break; | |
} | |
case STRSXP: { | |
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]); | |
break; | |
} | |
default: stop("Unsupported type '%s'.", type2name(x)); | |
} | |
} | |
tmp.attr("class") = "data.frame"; | |
tmp.attr("row.names") = 1; | |
tmp.attr("names") = nms; | |
res[i] = tmp; | |
} | |
res.attr("names") = x.attr("row.names"); | |
return res; | |
} | |
/***R | |
data("Batting", package = "Lahman") | |
x <- Batting[1:10000, 1:20] | |
library(benchr) | |
library(purrr) | |
benchmark( | |
purrr = by_row(x, function(v) list(v)[[1L]], .collate = "list")$.out, | |
rcpp1 = df2lst(x), | |
rcpp2 = df2lst2(x), | |
rcpp3 = df2lst3(x) | |
) | |
*/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Rcpp.h> | |
using namespace Rcpp; | |
// [[Rcpp::export]] | |
List df2lst(const DataFrame& x) { | |
std::size_t nrows = x.rows(); | |
std::size_t ncols = x.cols(); | |
CharacterVector nms = x.names(); | |
List res(no_init(nrows)); | |
for (std::size_t i = 0; i < nrows; ++i) { | |
List tmp(no_init(ncols)); | |
for (std::size_t j = 0; j < ncols; ++j) { | |
switch(TYPEOF(x[j])) { | |
case INTSXP: { | |
if (Rf_isFactor(x[j])) { | |
IntegerVector t = as<IntegerVector>(x[j]); | |
RObject t2 = wrap(t[i]); | |
t2.attr("class") = "factor"; | |
t2.attr("levels") = t.attr("levels"); | |
tmp[j] = t2; | |
} else { | |
tmp[j] = as<IntegerVector>(x[j])[i]; | |
} | |
break; | |
} | |
case LGLSXP: { | |
tmp[j] = as<LogicalVector>(x[j])[i]; | |
break; | |
} | |
case CPLXSXP: { | |
tmp[j] = as<ComplexVector>(x[j])[i]; | |
break; | |
} | |
case REALSXP: { | |
tmp[j] = as<NumericVector>(x[j])[i]; | |
break; | |
} | |
case STRSXP: { | |
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]); | |
break; | |
} | |
default: stop("Unsupported type '%s'.", type2name(x)); | |
} | |
} | |
tmp.attr("class") = "data.frame"; | |
tmp.attr("row.names") = 1; | |
tmp.names() = nms; | |
res[i] = tmp; | |
} | |
res.names() = x.attr("row.names"); | |
return res; | |
} | |
// [[Rcpp::export]] | |
DataFrame lst2df(const ListOf<DataFrame>& x) { | |
std::size_t nrows = x.size(); | |
std::size_t ncols = x[0].size(); | |
List res(no_init(ncols)); | |
for (std::size_t j = 0; j < ncols; ++j) { | |
switch(TYPEOF(x[0][j])) { | |
case INTSXP: { | |
if (Rf_isFactor(x[0][j])) { | |
IntegerVector t = IntegerVector(no_init(nrows)); | |
t.attr("class") = "factor"; | |
RObject t2 = x[0][j]; | |
t.attr("levels") = t2.attr("levels"); | |
res[j] = t; | |
} else | |
res[j] = IntegerVector(no_init(nrows)); | |
break; | |
} | |
case LGLSXP: { | |
res[j] = LogicalVector(no_init(nrows)); | |
break; | |
} | |
case REALSXP: { | |
res[j] = NumericVector(no_init(nrows)); | |
break; | |
} | |
case CPLXSXP: { | |
res[j] = ComplexVector(no_init(nrows)); | |
break; | |
} | |
case STRSXP: { | |
res[j] = CharacterVector(no_init(nrows)); | |
break; | |
} | |
default: stop("Unsupported type '%s'.", type2name(x)); | |
} | |
} | |
for (std::size_t j = 0; j < ncols; ++j) { | |
switch(TYPEOF(res[j])) { | |
case INTSXP: { | |
IntegerVector t = res[j]; | |
for (std::size_t i = 0; i < nrows; ++i) | |
t[i] = x[i][j]; | |
break; | |
} | |
case LGLSXP: { | |
LogicalVector t = res[j]; | |
for (std::size_t i = 0; i < nrows; ++i) | |
t[i] = x[i][j]; | |
break; | |
} | |
case CPLXSXP: { | |
ComplexVector t = res[j]; | |
for (std::size_t i = 0; i < nrows; ++i) | |
t[i] = x[i][j]; | |
break; | |
} | |
case REALSXP: { | |
NumericVector t = res[j]; | |
for (std::size_t i = 0; i < nrows; ++i) | |
t[i] = x[i][j]; | |
break; | |
} | |
case STRSXP: { | |
CharacterVector t = res[j]; | |
for (std::size_t i = 0; i < nrows; ++i) | |
t[i] = as<String>(x[i][j]); | |
break; | |
} | |
} | |
} | |
res.attr("class") = "data.frame"; | |
res.names() = x[0].names(); | |
res.attr("row.names") = IntegerVector::create(NA_INTEGER, -(nrows)); | |
return res; | |
} | |
/***R | |
data("Batting", package = "Lahman") | |
x <- df2lst(Batting[1:10000, 1:20]) | |
library(benchr) | |
library(dplyr) | |
benchmark( | |
lst2df(x), | |
bind_rows(x) | |
) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.