Last active
March 19, 2017 05:14
-
-
Save jimhester/e725e1ad50a5a62f3dee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# benchmarks from http://www.win-vector.com/blog/2015/07/efficient-accumulation-in-r/#more-3295 | |
mkRow <- function(nCol) { | |
x <- as.list(rnorm(nCol)) | |
# make row mixed types by changing first column to string | |
x[[1]] <- ifelse(x[[1]]>0,'pos','neg') | |
names(x) <- paste('x',seq_len(nCol),sep='.') | |
x | |
} | |
mkFrameForLoop <- function(nRow,nCol) { | |
set.seed(0) | |
d <- c() | |
for(i in seq_len(nRow)) { | |
ri <- mkRow(nCol) | |
di <- data.frame(ri, | |
stringsAsFactors=FALSE) | |
d <- rbind(d,di) | |
} | |
d | |
} | |
mkFrameList <- function(nRow,nCol) { | |
set.seed(0) | |
d <- lapply(seq_len(nRow),function(i) { | |
ri <- mkRow(nCol) | |
data.frame(ri, | |
stringsAsFactors=FALSE) | |
}) | |
do.call(rbind,d) | |
} | |
mkFrameBindRows <- function(nRow,nCol) { | |
set.seed(0) | |
d <- lapply(seq_len(nRow),function(i) { | |
ri <- mkRow(nCol) | |
data.frame(ri, | |
stringsAsFactors=FALSE) | |
}) | |
dplyr::bind_rows(d) | |
} | |
mkFrameVector <- function(nRow,nCol) { | |
set.seed(0) | |
res <- c(list(character(nRow)), replicate(nCol - 1, numeric(nRow), simplify = FALSE)) | |
for(i in seq_len(nRow)) { | |
ri <- mkRow(nCol) | |
for(j in seq_along(ri)) { | |
res[[j]][i] <- ri[[j]] | |
} | |
} | |
# fix the names | |
names(res) <- paste('x', | |
seq_len(nCol),sep='.') | |
data.frame(res, stringsAsFactors = FALSE) | |
} | |
mkRow2 <- function(nCol, fileloc) { | |
x <- as.list(rnorm(nCol)) | |
# make row mixed types by changing first column to string | |
x[[1]] <- ifelse(x[[1]]>0,'pos','neg') | |
writeLines(paste(x, collapse=" "), con = fileloc) | |
} | |
avoidAggregate <- function(nRow,nCol) { | |
set.seed(0) | |
some.file <- tempfile() | |
file.create(some.file) | |
cf <- file(some.file, open="a") | |
lapply(seq_len(nRow),function(i) {mkRow2(nCol, cf)}) | |
close(cf) | |
numnumer <- rep("numeric", nCol - 1) | |
res <- read.table(some.file, nrows=nRow, | |
colClasses= c("character", numnumer)) | |
names(res) <- paste('x', | |
seq_len(nCol),sep='.') | |
res | |
} | |
mkRowC <- function(nCol) { | |
xN <- rnorm(nCol) | |
x <- as.character(xN) | |
x[[1]] <- ifelse(xN[[1]]>0,'pos','neg') | |
x | |
} | |
mkFrameMat <- function(nRow,nCol) { | |
set.seed(0) | |
d <- matrix(data="", | |
nrow=nRow,ncol=nCol) | |
for(i in seq_len(nRow)) { | |
ri <- mkRowC(nCol) | |
d[i,] <- ri | |
} | |
d <- data.frame(d, | |
stringsAsFactors=FALSE) | |
if(nCol>1) { | |
for(i in 2:nCol) { | |
d[[i]] <- as.numeric(d[[i]]) | |
} | |
} | |
names(d) <- paste('x', | |
seq_len(nCol),sep='.') | |
d | |
} | |
library(microbenchmark) | |
library(compile) | |
rows = 10000 | |
columns = 100 | |
microbenchmark(a <- mkFrameForLoop(rows, columns), | |
b <- mkFrameList(rows, columns), | |
d <- mkFrameBindRows(rows, columns), | |
e <- avoidAggregate(rows, columns), | |
ec <- cmpfun(avoidAggregate)(rows, columns), | |
f <- mkFrameVector(rows, columns), | |
fc <- cmpfun(mkFrameVector)(rows, columns), | |
g <- mkFrameMat(rows, columns), | |
gc <- cmpfun(mkFrameMat)(rows, columns), | |
times = 1) | |
# Unit: milliseconds | |
# expr min lq mean median uq max neval | |
# a <- mkFrameForLoop(rows, columns) 145496.8312 145496.8312 145496.8312 145496.8312 145496.8312 145496.8312 1 | |
# b <- mkFrameList(rows, columns) 63811.0009 63811.0009 63811.0009 63811.0009 63811.0009 63811.0009 1 | |
# d <- mkFrameBindRows(rows, columns) 42721.5881 42721.5881 42721.5881 42721.5881 42721.5881 42721.5881 1 | |
# e <- avoidAggregate(rows, columns) 3618.5261 3618.5261 3618.5261 3618.5261 3618.5261 3618.5261 1 | |
# ec <- cmpfun(avoidAggregate)(rows, columns) 3666.2685 3666.2685 3666.2685 3666.2685 3666.2685 3666.2685 1 | |
# f <- mkFrameVector(rows, columns) 2809.1274 2809.1274 2809.1274 2809.1274 2809.1274 2809.1274 1 | |
# fc <- cmpfun(mkFrameVector)(rows, columns) 689.0486 689.0486 689.0486 689.0486 689.0486 689.0486 1 | |
# g <- mkFrameMat(rows, columns) 1182.1522 1182.1522 1182.1522 1182.1522 1182.1522 1182.1522 1 | |
# gc <- cmpfun(mkFrameMat)(rows, columns) 1932.6838 1932.6838 1932.6838 1932.6838 1932.6838 1932.6838 1 | |
all.equal(a, b) | |
all.equal(a, d, check.attributes = FALSE) # dplyr adds a few class names to the data frame | |
all.equal(a, e) | |
all.equal(a, ec) | |
all.equal(a, f) | |
all.equal(a, fc) | |
all.equal(a, g) | |
all.equal(a, gc) | |
# run the fast ones a few times to get more accurate timings | |
# Unit: milliseconds | |
# expr min lq mean median uq max neval cld | |
# e <- avoidAggregate(rows, columns) 3460.6368 3492.7273 3572.8147 3556.015 3675.985 3678.709 5 d | |
# ec <- cmpfun(avoidAggregate)(rows, columns) 3424.0553 3492.4201 3548.3598 3570.829 3596.581 3657.914 5 d | |
# f <- mkFrameVector(rows, columns) 2115.1614 2400.8824 2813.8144 2710.366 3155.541 3687.122 5 c | |
# fc <- cmpfun(mkFrameVector)(rows, columns) 575.5463 631.7447 708.3951 635.345 665.834 1033.506 5 a | |
# g <- mkFrameMat(rows, columns) 1279.0403 1286.2203 1328.8007 1322.776 1343.263 1412.704 5 b | |
# gc <- cmpfun(mkFrameMat)(rows, columns) 1228.0720 1299.3356 1419.9762 1330.457 1423.105 1818.911 5 b | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You may mean
library(compiler)
rather thanlibrary(compile)
or is compile a non-CRAN package?