Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Created July 23, 2014 17:43
Show Gist options
  • Save mrdwab/9464e97957e843f3eb91 to your computer and use it in GitHub Desktop.
Save mrdwab/9464e97957e843f3eb91 to your computer and use it in GitHub Desktop.
Col1 <- c("a", "b","b",NA)
Col2 <- c(NA, "a", "c", NA)
Col3 <- c(NA,NA,"b", "a")
dat <- data.frame(Col1, Col2, Col3)
fun1 <- function() {
cbind(dat,
apply(table(cbind(rn = 1:nrow(dat),
stack(lapply(dat, as.character)))),
c(1, 2), sum))
}
fun2 <- function() {
levs <- unique(unlist(dat))
count <- t(apply(dat, 1, function(x) table(factor(x, levels = levs))))
cbind(dat, count)
}
fun3 <- function() {
cbind(dat,aggregate(value~Var2, melt(t(dat)), FUN=table)[,-1])
}
fun4 <- function() {
unq_values <- unique(unlist(dat))
unq_values <- unq_values[!is.na(unq_values)]
freq_vec <- function(u) apply(dat, 1, function(x) sum(grepl(u, x)))
cbind(dat, sapply(unq_values, freq_vec))
}
library(microbenchmark)
library(reshape2)
microbenchmark(fun1(), fun2(), fun3(), fun4())
# Unit: milliseconds
# expr min lq median uq max neval
# fun1() 1.882373 1.981502 2.031227 2.074144 4.193716 100
# fun2() 2.201289 2.271821 2.316432 2.346138 5.147774 100
# fun3() 6.565937 6.821392 6.928942 7.078843 11.700034 100
# fun4() 2.043613 2.120811 2.151803 2.206342 5.283656 100
dat <- do.call(rbind, replicate(5000, dat, FALSE))
dim(dat)
# [1] 20000 3
system.time(fun1())
# user system elapsed
# 0.657 0.004 0.662
system.time(fun2())
# user system elapsed
# 7.730 0.029 7.787
system.time(fun3())
# user system elapsed
# 16.795 0.063 16.887
system.time(fun4())
# user system elapsed
# 2.128 0.011 2.141
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment