Created
October 22, 2014 21:20
-
-
Save hadley/2751ba61d1c7f4eaacab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
recode <- function(df, ..., match = c("first", "last")) { | |
match <- match.arg(match) | |
cases <- lapply(list(...), as.case) | |
if (identical(match, "last")) cases <- rev(cases) | |
n <- nrow(df) | |
out <- rep(NA, length(n)) # logical will be upcast as needed | |
# Simple loop-y implementation | |
for (i in seq_len(n)) { | |
row <- df[i, ] | |
for (j in seq_along(cases)) { | |
case <- cases[[j]] | |
res <- eval(case$expr, row, case$env) | |
if (isTRUE(res)) { | |
val <- eval(case$val, row, case$env) | |
out[[i]] <- val | |
break | |
} | |
} | |
} | |
out | |
} | |
# Case data structure ---------------------------------------------------------- | |
case <- function(expr, val, env) { | |
structure(list(expr = expr, val = val, env = env), class = "case") | |
} | |
as.case <- function(x) UseMethod("as.case") | |
as.case.case <- function(x) x | |
as.case.formula <- function(x) { | |
if (length(x) == 3) { | |
case(x[[2]], x[[3]], environment(x)) | |
} else if (length(x) == 2) { | |
case(TRUE, x[[2]], environment(x)) | |
} else { | |
stop("Invalid formual") | |
} | |
} | |
print.case <- function(x, ...) { | |
cat("<case>\n") | |
cat(" expr: ", deparse(x$expr), "\n", sep = "") | |
cat(" val: ", x$val, "\n", sep = "") | |
cat(" env: ", format(x$env), "\n", sep = "") | |
} | |
# Examples --------------------------------------------------------------------- | |
recode(mtcars, | |
mpg < 20 ~ "a", | |
vs == 1 ~ "b" | |
) | |
recode(mtcars, | |
mpg < 20 ~ "a", | |
vs == 1 ~ "b", | |
~ "c" | |
) | |
recode(mtcars, | |
mpg < 20 ~ mpg, | |
~ mpg + 100 | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi hadley! I'm wondering if there's any possible that
{dplyr}
incorporate this functionrecode
in the future. As a fan of{dplyr}
, I think it's a very useful and easy-to-use function. If not, how should I use the existing function in{dplyr}
for the similar circumstances? I'm very grateful for the development of such wonderful tool in data manipulation!