Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
Last active January 28, 2017 03:47
Show Gist options
  • Save artemklevtsov/f36aaf101062d4b420d8 to your computer and use it in GitHub Desktop.
Save artemklevtsov/f36aaf101062d4b420d8 to your computer and use it in GitHub Desktop.
Improved version of the reshape2::melt function
melt2 <- function(data, id.vars, measure.vars, variable.name = "variable", value.name = "value", na.rm = FALSE) {
varnames <- names(data)
n <- .row_names_info(data, 2L)
if (!missing(id.vars) && is.numeric(id.vars))
id.vars <- varnames[id.vars]
if (!missing(measure.vars) && is.numeric(measure.vars))
measure.vars <- varnames[measure.vars]
if (missing(id.vars) && missing(measure.vars)) {
discrete <- vapply(data, function(x) is.factor(x) || is.character(x) || is.logical(x),
FUN.VALUE = logical(1L), USE.NAMES = FALSE)
id.vars <- varnames[discrete]
measure.vars <- varnames[!discrete]
}
else if (missing(id.vars))
id.vars <- varnames[!varnames %in% measure.vars]
else if (missing(measure.vars))
measure.vars <- varnames[!varnames %in% id.vars]
if (!length(measure.vars))
return(data[, id.vars])
variable <- rep.int(measure.vars, rep(n, length(measure.vars)))
nrows <- length(variable)
value <- unlist(.subset(data, measure.vars), recursive = FALSE, use.names = FALSE)
if (is.factor(value))
value <- as.character(value)
if (length(id.vars)) {
ids <- lapply(id.vars, function(id) rep.int(.subset2(data, id), length(measure.vars)))
res <- stats::setNames(c(ids, list(variable, value)), c(id.vars, variable.name, value.name))
} else
res <- stats::setNames(list(variable, value), c(variable.name, value.name))
if (na.rm && anyNA(value)) {
ind <- which(is.na(value))
res <- lapply(res, .subset, -ind)
nrows <- nrows - length(ind)
}
class(res) <- "data.frame"
attr(res, "row.names") = .set_row_names(nrows)
return(res)
}
@artemklevtsov
Copy link
Author

R> benchmark(
..     melt = melt(Batting, id.vars = c("playerID", "teamID", "lgID"), na.rm = TRUE),
..     melt2 = melt2(Batting, id.vars = c("playerID", "teamID", "lgID"), na.rm = TRUE),
..     gather = gather(Batting, variable, value, -playerID, -teamID, -lgID, na.rm = TRUE)
.. )
Benchmark summary:
Time units : milliseconds 
   expr n.eval   min lw.qu median mean up.qu max total relative
   melt    100 104.0 112.0  115.0  123   122 193 12300     1.26
  melt2    100  76.5  87.2   91.3  102   102 186 10200     1.00
 gather    100 158.0 170.0  182.0  196   226 285 19600     1.99
R> benchmark(
..     melt = melt(Batting, id.vars = c("playerID", "teamID", "lgID")),
..     melt2 = melt2(Batting, id.vars = c("playerID", "teamID", "lgID")),
..     gather = gather(Batting, variable, value, -playerID, -teamID, -lgID)
.. )
Benchmark summary:
Time units : milliseconds 
   expr n.eval  min lw.qu median mean up.qu  max total relative
   melt    100 13.7  13.8   13.9 16.1  14.4 86.6  1610     1.00
  melt2    100 21.0  21.2   21.4 25.5  29.5 96.2  2550     1.54
 gather    100 23.0  23.1   23.3 24.6  23.7 38.1  2460     1.68

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment