Skip to content

Instantly share code, notes, and snippets.

@lionel-
Created July 22, 2019 12:30
Show Gist options
  • Save lionel-/738888205b5351ab98b3ea91dbda7c4a to your computer and use it in GitHub Desktop.
Save lionel-/738888205b5351ab98b3ea91dbda7c4a to your computer and use it in GitHub Desktop.
Recoding values
# The general idea is to supply a recoding specification through a
# data frame of keys and values. They keys are a generalisation of
# names, they can be any type.
keys <- function(key, value) {
tibble::tibble(.key = key, .value = value)
}
dribbleys <- function(...) {
tibble::tribble(~ .key, ~ .value, ...)
}
is_keys <- function(x) {
is.data.frame(x) && all(names(x) %in% c(".key", ".value"))
}
keys(0:2, c(4L, 6L, 8L))
#> # A tibble: 3 x 2
#> .key .value
#> <int> <int>
#> 1 0 4
#> 2 1 6
#> 3 2 8
dribbleys(
0L, 4L,
1L, 6L,
2L, 8L
)
#> # A tibble: 3 x 2
#> .key .value
#> <int> <int>
#> 1 0 4
#> 2 1 6
#> 3 2 8
vec_recode <- function(x, spec, ..., default = NULL, ptype = NULL) {
ellipsis::check_dots_empty(...)
if (!is_keys(spec)) {
abort("`spec` must be a data frame with `.key` and `.value` columns")
}
key <- spec$.key
value <- spec$.value
ptype <- ptype %||% vec_ptype_common(key, default)
default <- default %||% x
c(x, key) %<-% vec_cast_common(x, key, .to = ptype)
# Handle list-columns so multiple values can be mapped to a single key
if (is_bare_list(value)) {
value <- vec_cast_common(!!!value, .to = ptype)
} else {
value <- vec_cast(value, to = ptype)
}
out <- vec_init(x, vec_size(x))
done <- rep_along(out, FALSE)
for (i in seq_along(value)) {
where <- vec_in(x, value[[i]])
done <- done | where
vec_slice(out, where) <- key[[i]]
}
todo <- !done
if (any(todo)) {
default <- vec_recycle(default, vec_size(x))
vec_slice(out, todo) <- vec_slice(default, todo)
}
out
}
environment(vec_recode) <- rlang::ns_env("vctrs")
vec_recode(mtcars$cyl, keys(0:2, c(4L, 6L, 8L)))
#> [1] 1 1 0 1 2 1 2 0 0 1 1 2 2 2 2 2 2 0 0 0 0 2 2 2 2 0 0 0 2 1 2 0
vec_recode(mtcars$cyl, keys(0:1, c(4L, 6L)))
#> [1] 1 1 0 1 8 1 8 0 0 1 1 8 8 8 8 8 8 0 0 0 0 8 8 8 8 0 0 0 8 1 8 0
vec_recode(mtcars$cyl, keys(0:1, c(4L, 6L)), default = 1.5)
#> [1] 1.0 1.0 0.0 1.0 1.5 1.0 1.5 0.0 0.0 1.0 1.0 1.5 1.5 1.5 1.5 1.5 1.5 0.0
#> [19] 0.0 0.0 0.0 1.5 1.5 1.5 1.5 0.0 0.0 0.0 1.5 1.0 1.5 0.0
x <- c("foo", "bar", NA, "foo")
spec <- keys(c("FOO", "missing"), c("foo", NA))
vec_recode(x, spec, default = "default")
#> [1] "FOO" "default" "missing" "FOO"
# Corresponding dplyr code:
dplyr::recode(mtcars$cyl, `4` = 0, `6` = 1, `8` = 2)
dplyr::recode(mtcars$cyl, `4` = 0, `6` = 1)
dplyr::recode(mtcars$cyl, `4` = 0, `6` = 1, .default = 1.5)
dplyr::recode(c("foo", "bar", NA, "foo"), `foo` = "FOO", .default = "default", .missing = "missing")
# Can also recode multiple values to a single key:
spec <- dribbleys(
0, c(4, 6),
1, 8
)
vec_recode(mtcars$cyl, spec)
#> [1] 0 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 1 0 1 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment