Last active
June 15, 2019 20:39
-
-
Save r2evans/5503cc5df9be9179d4ae80e7ef91308e to your computer and use it in GitHub Desktop.
Combination of R's Reduce function with Map's k-ary arguments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' 'Reduce' with arbitrary number of arguments | |
#' | |
#' Applies a function to the corresponding elements of given vectors, | |
#' in a reductionist way. (This is *not* related to the [Apache Hadoop | |
#' MapReduce](https://hadoop.apache.org/) project ... while this may | |
#' suggest the name 'MapReduce' is a poor choice for this function, it | |
#' is a logical combination of R's [Map()] and [Reduce()] functions.) | |
#' | |
#' @details | |
#' | |
#' The function is called with 'k' vectors as arguments, and the | |
#' function should return either a list of k single values, or a | |
#' single return value. If a singleton, then the value is assumed to | |
#' replace the 'i'th value within the first argument (list or vector), | |
#' and the next iteration's "previous" values will be take from this | |
#' and this i'th value from all vectors except the first; if a | |
#' k-length list, then this list is used as the next iteration's | |
#' "previous" (the other value in each of the k argument vectors). | |
#' | |
#' If 'init' is given, this logically adds it to the start (when | |
#' proceeding left-to-right) or the end of the input vectors, | |
#' respectively. If these possibly-augmented vectors have n > 1 | |
#' vectors, 'MapReduce' successively applies f to the first (last) 2 | |
#' values of each vector. | |
#' | |
#' @section Translation from 'Reduce': | |
#' | |
#' Not all functions will translate directly from [Reduce()]. Many | |
#' functions that work well with 'Reduce' expect exactly two | |
#' arguments, with an effective length of 1 each; one example is '+' | |
#' and other binary operators. In contrast, 'MapReduce' expects k | |
#' vectors and will operate on all vectors (of length 2) in a call. | |
#' | |
#' While the use of binary operators as the sole function passed to | |
#' 'MapReduce' is ignoring the 'Map' component, to translate binary | |
#' operators, one must wrap the function so that it expects a single | |
#' argument of length two. For example | |
#' | |
#' ``` | |
#' binary_func <- function(a, b) `+`(a, b) # Reduce | |
#' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce | |
#' ``` | |
#' | |
#' Author: Bill Evans ([email protected]) | |
#' | |
#' License: MIT: use as you will, no warranty, keep this license and | |
#' citation if re-distributed | |
#' | |
#' @param f a function of the 'k' arity if this is called with k | |
#' arguments. | |
#' @param ... vectors or lists | |
#' @param init an R object of the same kind as the elements of 'x'. | |
#' @param right a logical indicating whether to proceed from left to | |
#' right (default) or from right to left. | |
#' @param accumulate a logical indicating whether the successive | |
#' reduce combinations should be accumulated. By default, only the | |
#' final combination is used. | |
#' @return 'list', possibly with nested lists, and further if | |
#' 'accumulate' is true | |
#' @export | |
#' @md | |
#' @examples | |
#' ### Adaptations from [Reduce()] documentation: | |
#' x <- list(1, 2, 3) | |
#' Reduce("+", x) | |
#' # "+" is not a good function for MapReduce, as it expects two | |
#' # separate inputs instead of one or more vectors, length 2; so | |
#' # instead, we must adapt | |
#' MapReduce(function(a) a[1] + a[2], x) | |
#' # perhaps [sum()] is better | |
#' MapReduce(sum, x) | |
#' | |
#' Reduce("+", x, accumulate = TRUE) | |
#' MapReduce(sum, x, accumulate = TRUE) | |
#' | |
#' cfrac_binary <- function(u, v) u + 1 / v | |
#' cfrac_unary <- function(u) u[1] + 1 / u[2] | |
#' ## Continued fraction approximation for pi: | |
#' x <- c(3, 7, 15, 1, 292) | |
#' Reduce(cfrac_binary, x, right = TRUE) | |
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE) | |
#' | |
#' ## Continued fraction approximation for Euler's number (e): | |
#' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8) | |
#' Reduce(cfrac_binary, x, right = TRUE) | |
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE) | |
#' | |
#' ### Extensions, allowing for more arguments | |
#' | |
#' # suggested by https://stackoverflow.com/q/56612304 | |
#' if (require("data.table")) { | |
#' | |
#' nT <- 5 | |
#' int <- rep(1.1, 5) | |
#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1)))) | |
#' f <- function(payments, interests) { | |
#' list(payments[2] + interests[2] * payments[1], interests[2]) | |
#' } | |
#' | |
#' loan[, c("interest", "balance") := 0 | |
#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE) | |
#' ][,interest := c(0, diff(balance) - payment[-1]) | |
#' ] | |
#' | |
#' ### that was boring, 'int' was unchanging; here's the impetus for MapReduce | |
#' | |
#' set.seed(2) | |
#' (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1) | |
#' # [1] 1.082062 1.103697 1.131757 1.077392 1.098395 | |
#' | |
#' # refresh 'loan' | |
#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1)))) | |
#' | |
#' loan[, c("interest", "balance") := 0 | |
#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE) | |
#' ][,interest := c(0, diff(balance) - payment[-1]) | |
#' ] | |
#' | |
#' } | |
MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) { | |
mis <- missing(init) | |
xs <- list(...) | |
lens <- lengths(xs) | |
len1 <- max(lens) | |
if (length(lens) == 0L || len1 == 0L) | |
return(if (mis) NULL else init) | |
if (!all(lens %in% c(len1, 1L))) { | |
stop("all arguments must be same length or length 1") | |
} | |
xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L]) | |
f <- match.fun(f) | |
xs <- lapply(xs, function(x) if (!is.vector(x) || is.object(x)) as.list(x) else x) | |
ind <- seq_len(len1) | |
if (mis) { | |
if (right) { | |
init <- lapply(xs, `[[`, len1) | |
ind <- ind[-len1] | |
} else { | |
init <- lapply(xs, `[[`, 1L) | |
ind <- ind[-1L] | |
} | |
} | |
if (!accumulate) { | |
if (right) { | |
for (i in rev(ind)) { | |
out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) | |
if (is.list(out)) { | |
if (length(out) == length(init)) { | |
init <- out | |
} else { | |
stop("function output is not length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out | |
init <- lapply(xs, `[[`, i) | |
} | |
} | |
} else { | |
for (i in ind) { | |
out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) | |
if (is.list(out)) { | |
if (length(out) == length(init)) { | |
init <- out | |
} else { | |
stop("function output is not length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out | |
init <- lapply(xs, `[[`, i) | |
} | |
} | |
} | |
if (!isFALSE(simplify) && length(out)) | |
init <- simplify2array(init[[1]], higher = (simplify == "array")) | |
init | |
} else { | |
len <- length(ind) + 1L | |
out <- vector("list", len) | |
if (mis) { | |
if (right) { | |
out[[len]] <- init | |
for (i in rev(ind)) { | |
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) | |
if (is.list(out1)) { | |
if (length(out1) == length(init)) { | |
out[[i]] <- init <- out1 | |
} else { | |
stop("function output is not a list length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out1 | |
out[[i]] <- init <- lapply(xs, `[[`, i) | |
} | |
} | |
} else { | |
out[[1L]] <- init | |
for (i in ind) { | |
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) | |
if (is.list(out1)) { | |
if (length(out1) == length(init)) { | |
out[[i]] <- init <- out1 | |
} else { | |
stop("function output is not length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out1 | |
out[[i]] <- init <- lapply(xs, `[[`, i) | |
} | |
} | |
} | |
} else { | |
if (right) { | |
out[[len]] <- init | |
for (i in rev(ind)) { | |
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) | |
if (is.list(out1)) { | |
if (length(out1) == length(init)) { | |
out[[i]] <- init <- out1 | |
} else { | |
stop("function output is not a list length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out1 | |
out[[i]] <- init <- lapply(xs, `[[`, i) | |
} | |
} | |
} else { | |
for (i in ind) { | |
out[[i]] <- init | |
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) | |
if (is.list(out1)) { | |
if (length(out1) == length(init)) { | |
init <- out1 | |
} else { | |
stop("function output is not length 1 or same length as 'init'") | |
} | |
} else { | |
xs[[1]][[i]] <- out1 | |
init <- lapply(xs, `[[`, i) | |
} | |
} | |
out[[len]] <- init | |
} | |
} | |
if (all(lengths(out) == 1L)) | |
out <- unlist(out, recursive = FALSE) | |
if (!isFALSE(simplify) && length(out)) | |
out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array")) | |
out | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment