Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
Last active April 9, 2017 06:34
Show Gist options
  • Save artemklevtsov/a6c5c4fb19416daa59943f7fb1f9dc96 to your computer and use it in GitHub Desktop.
Save artemklevtsov/a6c5c4fb19416daa59943f7fb1f9dc96 to your computer and use it in GitHub Desktop.
#include <Rcpp.h>
using namespace Rcpp;
RObject get_elem(const RObject& x, std::size_t i) {
switch(x.sexp_type()) {
case INTSXP: {
RObject res = wrap(INTEGER(x)[i]);
if (Rf_isFactor(x)) {
res.attr("class") = "factor";
res.attr("levels") = x.attr("levels");
}
return res;
}
case REALSXP: return wrap(REAL(x)[i]);
case CPLXSXP: return wrap(COMPLEX(x)[i]);
case LGLSXP: return wrap(LOGICAL(x)[i]);
case STRSXP: return wrap(CHAR(STRING_ELT(x, i)));
default: stop("Unsupported type '%s'.", type2name(x));
}
}
// [[Rcpp::export]]
List df2lst(const DataFrame& x) {
std::size_t nrows = x.rows();
std::size_t ncols = x.cols();
CharacterVector nms = x.names();
List res(no_init(nrows));
for (std::size_t i = 0; i < nrows; ++i) {
List tmp(no_init(ncols));
for (std::size_t j = 0; j < ncols; ++j)
tmp[j] = get_elem(VECTOR_ELT(x, j), i);
tmp.attr("class") = "data.frame";
tmp.attr("row.names") = 1;
tmp.attr("names") = nms;
res[i] = tmp;
}
res.attr("names") = x.attr("row.names");
return res;
}
// [[Rcpp::export]]
List df2lst2(const DataFrame& x) {
std::size_t nrows = x.rows();
std::size_t ncols = x.cols();
CharacterVector nms = x.names();
List res(no_init(nrows));
for (std::size_t i = 0; i < nrows; ++i) {
List tmp(no_init(ncols));
for (std::size_t j = 0; j < ncols; ++j) {
if (is<IntegerVector>(x[j])) {
IntegerVector t = as<IntegerVector>(x[j]);
if (Rf_isFactor(t)) {
RObject t2 = wrap(t[i]);
t2.attr("class") = "factor";
t2.attr("levels") = t.attr("levels");
tmp[j] = t2;
} else
tmp[j] = t[i];
} else if (is<LogicalVector>(x[j]))
tmp[j] = as<LogicalVector>(x[j])[i];
else if (is<NumericVector>(x[j]))
tmp[j] = as<NumericVector>(x[j])[i];
else if (is<ComplexVector>(x[j]))
tmp[j] = as<ComplexVector>(x[j])[i];
else if (is<CharacterVector>(x[j]))
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]);
else
stop("Unsupported type '%s'.", type2name(x));
}
tmp.attr("class") = "data.frame";
tmp.attr("row.names") = 1;
tmp.attr("names") = nms;
res[i] = tmp;
}
res.attr("names") = x.attr("row.names");
return res;
}
// [[Rcpp::export]]
List df2lst3(const DataFrame& x) {
std::size_t nrows = x.rows();
std::size_t ncols = x.cols();
CharacterVector nms = x.names();
List res(no_init(nrows));
for (std::size_t i = 0; i < nrows; ++i) {
List tmp(no_init(ncols));
for (std::size_t j = 0; j < ncols; ++j) {
switch(TYPEOF(x[j])) {
case INTSXP: {
IntegerVector t = as<IntegerVector>(x[j]);
if (Rf_isFactor(t)) {
RObject t2 = wrap(t[i]);
t2.attr("class") = "factor";
t2.attr("levels") = t.attr("levels");
tmp[j] = t2;
} else
tmp[j] = t[i];
break;
}
case LGLSXP: {
tmp[j] = as<LogicalVector>(x[j])[i];
break;
}
case CPLXSXP: {
tmp[j] = as<ComplexVector>(x[j])[i];
break;
}
case REALSXP: {
tmp[j] = as<NumericVector>(x[j])[i];
break;
}
case STRSXP: {
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]);
break;
}
default: stop("Unsupported type '%s'.", type2name(x));
}
}
tmp.attr("class") = "data.frame";
tmp.attr("row.names") = 1;
tmp.attr("names") = nms;
res[i] = tmp;
}
res.attr("names") = x.attr("row.names");
return res;
}
/***R
data("Batting", package = "Lahman")
x <- Batting[1:10000, 1:20]
library(benchr)
library(purrr)
benchmark(
purrr = by_row(x, function(v) list(v)[[1L]], .collate = "list")$.out,
rcpp1 = df2lst(x),
rcpp2 = df2lst2(x),
rcpp3 = df2lst3(x)
)
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
List df2lst(const DataFrame& x) {
std::size_t nrows = x.rows();
std::size_t ncols = x.cols();
CharacterVector nms = x.names();
List res(no_init(nrows));
for (std::size_t i = 0; i < nrows; ++i) {
List tmp(no_init(ncols));
for (std::size_t j = 0; j < ncols; ++j) {
switch(TYPEOF(x[j])) {
case INTSXP: {
if (Rf_isFactor(x[j])) {
IntegerVector t = as<IntegerVector>(x[j]);
RObject t2 = wrap(t[i]);
t2.attr("class") = "factor";
t2.attr("levels") = t.attr("levels");
tmp[j] = t2;
} else {
tmp[j] = as<IntegerVector>(x[j])[i];
}
break;
}
case LGLSXP: {
tmp[j] = as<LogicalVector>(x[j])[i];
break;
}
case CPLXSXP: {
tmp[j] = as<ComplexVector>(x[j])[i];
break;
}
case REALSXP: {
tmp[j] = as<NumericVector>(x[j])[i];
break;
}
case STRSXP: {
tmp[j] = as<std::string>(as<CharacterVector>(x[j])[i]);
break;
}
default: stop("Unsupported type '%s'.", type2name(x));
}
}
tmp.attr("class") = "data.frame";
tmp.attr("row.names") = 1;
tmp.names() = nms;
res[i] = tmp;
}
res.names() = x.attr("row.names");
return res;
}
// [[Rcpp::export]]
DataFrame lst2df(const ListOf<DataFrame>& x) {
std::size_t nrows = x.size();
std::size_t ncols = x[0].size();
List res(no_init(ncols));
for (std::size_t j = 0; j < ncols; ++j) {
switch(TYPEOF(x[0][j])) {
case INTSXP: {
if (Rf_isFactor(x[0][j])) {
IntegerVector t = IntegerVector(no_init(nrows));
t.attr("class") = "factor";
RObject t2 = x[0][j];
t.attr("levels") = t2.attr("levels");
res[j] = t;
} else
res[j] = IntegerVector(no_init(nrows));
break;
}
case LGLSXP: {
res[j] = LogicalVector(no_init(nrows));
break;
}
case REALSXP: {
res[j] = NumericVector(no_init(nrows));
break;
}
case CPLXSXP: {
res[j] = ComplexVector(no_init(nrows));
break;
}
case STRSXP: {
res[j] = CharacterVector(no_init(nrows));
break;
}
default: stop("Unsupported type '%s'.", type2name(x));
}
}
for (std::size_t j = 0; j < ncols; ++j) {
switch(TYPEOF(res[j])) {
case INTSXP: {
IntegerVector t = res[j];
for (std::size_t i = 0; i < nrows; ++i)
t[i] = x[i][j];
break;
}
case LGLSXP: {
LogicalVector t = res[j];
for (std::size_t i = 0; i < nrows; ++i)
t[i] = x[i][j];
break;
}
case CPLXSXP: {
ComplexVector t = res[j];
for (std::size_t i = 0; i < nrows; ++i)
t[i] = x[i][j];
break;
}
case REALSXP: {
NumericVector t = res[j];
for (std::size_t i = 0; i < nrows; ++i)
t[i] = x[i][j];
break;
}
case STRSXP: {
CharacterVector t = res[j];
for (std::size_t i = 0; i < nrows; ++i)
t[i] = as<String>(x[i][j]);
break;
}
}
}
res.attr("class") = "data.frame";
res.names() = x[0].names();
res.attr("row.names") = IntegerVector::create(NA_INTEGER, -(nrows));
return res;
}
/***R
data("Batting", package = "Lahman")
x <- df2lst(Batting[1:10000, 1:20])
library(benchr)
library(dplyr)
benchmark(
lst2df(x),
bind_rows(x)
)
*/
@artemklevtsov
Copy link
Author

artemklevtsov commented Apr 9, 2017

Benchmark summary:
Time units : milliseconds 
         expr n.eval   min lw.qu median  mean up.qu   max total relative
    lst2df(x)    100  36.8    38   39.1  39.4  40.2  47.3  3940     1.00
 bind_rows(x)    100 242.0   256  264.0 264.0 269.0 311.0 26400     6.75

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment