Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Last active December 18, 2015 08:40
Show Gist options
  • Select an option

  • Save mrdwab/3ff4666fc04a5e146d6f to your computer and use it in GitHub Desktop.

Select an option

Save mrdwab/3ff4666fc04a5e146d6f to your computer and use it in GitHub Desktop.
flatten <- function(indt, cols, drop = FALSE) {
if (!is.data.table(indt)) indt <- as.data.table(indt)
x <- unlist(indt[, lapply(.SD, function(x) max(lengths(x))), .SDcols = cols])
nams <- paste(rep(cols, x), sequence(x), sep = "_")
indt[, (nams) := unlist(lapply(.SD, transpose), recursive = FALSE), .SDcols = cols]
if (isTRUE(drop)) {
indt[, (nams) := unlist(lapply(.SD, transpose), recursive = FALSE),
.SDcols = cols][, (cols) := NULL]
}
indt[]
}
library(splitstackshape)
df <- cSplit_l(cSplit_l(cSplit_l(
concat.test, "Likes", ",", drop = TRUE),
"Siblings", " , ", drop = TRUE),
"Hates", ";", drop = TRUE)
names(df) <- c("Name", "Likes", "Siblings", "Hates")
## Bigger data...
df2 <- do.call(rbind, replicate(1000, df, FALSE))
## Tests
system.time(flatten(df, c("Likes", "Siblings", "Hates")))
system.time(flatten(df2, c("Likes", "Siblings", "Hates"), TRUE))
rawrFun <- function(indf) {
indf[] <- lapply(indf, f)
do.call(data.frame, indf)
}
system.time(rawrFun(df))
system.time(rawrFun(df2))
HerokaFun <- function(indf) {
do.call(cbind.data.frame, lapply(indf, function(x) {
# check if it is a list, otherwise just return as is
if (is.list(x)) {
data.frame(t(sapply(x,'[', seq(max(lengths(x))))))
} else {
x
}
}))
}
system.time(HerokaFun(df))
system.time(HerokaFun(df2))
@mrdwab
Copy link
Copy Markdown
Author

mrdwab commented Dec 18, 2015

Timing:

## Tests
system.time(flatten(df, c("Likes", "Siblings", "Hates")))
#    user  system elapsed 
#       0       0       0 
system.time(flatten(df2, c("Likes", "Siblings", "Hates"), TRUE))
#    user  system elapsed 
#    0.08    0.00    0.06 

system.time(rawrFun(df))
#    user  system elapsed 
#       0       0       0 
system.time(rawrFun(df2))
#    user  system elapsed 
#   46.65    0.00   46.72 

system.time(HerokaFun(df))
#    user  system elapsed 
#       0       0       0 
system.time(HerokaFun(df2))
#    user  system elapsed 
#    0.29    0.00    0.29 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment