r2evans · June 15, 2019 20:39
diff --git a/MapReduce.R b/MapReduce.R
 #' 'Reduce' with arbitrary number of arguments
 #'
 #' Applies a function to the corresponding elements of given vectors,
 #' in a reductionist way. (This is *not* related to the [Apache Hadoop
 #' MapReduce](https://hadoop.apache.org/) project ... while this may
 #' suggest the name 'MapReduce' is a poor choice for this function, it
 #' is a logical combination of R's [Map()] and [Reduce()] functions.)
 #'
 #' @details
 #'
 #' The function is called with 'k' vectors as arguments, and the
 #' function should return either a list of k single values, or a
 #' single return value. If a singleton, then the value is assumed to
 #' replace the 'i'th value within the first argument (list or vector),
 #' and the next iteration's "previous" values will be take from this
 #' and this i'th value from all vectors except the first; if a
 #' k-length list, then this list is used as the next iteration's
 #' "previous" (the other value in each of the k argument vectors).
 #'
 #' If 'init' is given, this logically adds it to the start (when
 #' proceeding left-to-right) or the end of the input vectors,
 #' respectively. If these possibly-augmented vectors have n > 1
 #' vectors, 'MapReduce' successively applies f to the first (last) 2
 #' values of each vector.
 #'
 #' @section Translation from 'Reduce':
 #'
 #' Not all functions will translate directly from [Reduce()]. Many
 #' functions that work well with 'Reduce' expect exactly two
 #' arguments, with an effective length of 1 each; one example is '+'
 #' and other binary operators. In contrast, 'MapReduce' expects k
 #' vectors and will operate on all vectors (of length 2) in a call.
 #'
 #' While the use of binary operators as the sole function passed to
 #' 'MapReduce' is ignoring the 'Map' component, to translate binary
 #' operators, one must wrap the function so that it expects a single
 #' argument of length two. For example
 #'
 #' ```
 #' binary_func  <- function(a, b) `+`(a, b)    # Reduce
 #' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce
 #' ```
 #'
 #' Author: Bill Evans ([email protected])
 #' 
 #' License: MIT: use as you will, no warranty, keep this license and
 #' citation if re-distributed
 #' 
 #' @param f a function of the 'k' arity if this is called with k
 #'   arguments.
 #' @param ... vectors or lists
 #' @param init an R object of the same kind as the elements of 'x'.
 #' @param right a logical indicating whether to proceed from left to
 #'   right (default) or from right to left.
 #' @param accumulate a logical indicating whether the successive
 #'   reduce combinations should be accumulated. By default, only the
 #'   final combination is used.
 #' @return 'list', possibly with nested lists, and further if
 #'   'accumulate' is true
 #' @export
 #' @md
 #' @examples
 #' ### Adaptations from [Reduce()] documentation:
 #' x <- list(1, 2, 3)
 #' Reduce("+", x)
 #' # "+" is not a good function for MapReduce, as it expects two
 #' # separate inputs instead of one or more vectors, length 2; so
 #' # instead, we must adapt
 #' MapReduce(function(a) a[1] + a[2], x)
 #' # perhaps [sum()] is better
 #' MapReduce(sum, x)
 #' 
 #' Reduce("+", x, accumulate = TRUE)
 #' MapReduce(sum, x, accumulate = TRUE)
 #'
 #' cfrac_binary <- function(u, v) u + 1 / v
 #' cfrac_unary <- function(u) u[1] + 1 / u[2]
 #' ## Continued fraction approximation for pi:
 #' x <- c(3, 7, 15, 1, 292)
 #' Reduce(cfrac_binary, x, right = TRUE)
 #' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
 #'
 #' ## Continued fraction approximation for Euler's number (e):
 #' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8)
 #' Reduce(cfrac_binary, x, right = TRUE)
 #' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
 #'
 #' ### Extensions, allowing for more arguments
 #'
 #' # suggested by https://stackoverflow.com/q/56612304
 #' if (require("data.table")) {
 #' 
 #'   nT <- 5
 #'   int <- rep(1.1, 5)
 #'   loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
 #'   f <- function(payments, interests) {
 #'     list(payments[2] + interests[2] * payments[1], interests[2])
 #'   }
 #' 
 #'   loan[, c("interest", "balance") := 0
 #'        ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
 #'          ][,interest := c(0, diff(balance) - payment[-1])
 #'            ]
 #'
 #'   ### that was boring, 'int' was unchanging; here's the impetus for MapReduce
 #' 
 #'   set.seed(2)
 #'   (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1)
 #'   # [1] 1.082062 1.103697 1.131757 1.077392 1.098395
 #' 
 #'   # refresh 'loan'
 #'   loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
 #' 
 #'   loan[, c("interest", "balance") := 0
 #'        ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
 #'          ][,interest := c(0, diff(balance) - payment[-1])
 #'            ]
 #' 
 #' }
 MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) {
  mis <- missing(init)
  xs <- list(...)
  lens <- lengths(xs)
  len1 <- max(lens)
  if (length(lens) == 0L || len1 == 0L) 
    return(if (mis) NULL else init)
  if (!all(lens %in% c(len1, 1L))) {
    stop("all arguments must be same length or length 1")
  }
  xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L])
  f <- match.fun(f)
  xs <- lapply(xs, function(x) if (!is.vector(x) || is.object(x)) as.list(x) else x)
  ind <- seq_len(len1)
  if (mis) {
    if (right) {
      init <- lapply(xs, `[[`, len1)
      ind <- ind[-len1]
    } else {
      init <- lapply(xs, `[[`, 1L)
      ind <- ind[-1L]
    }
  }
  if (!accumulate) {
    if (right) {
      for (i in rev(ind)) {
        out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
        if (is.list(out)) {
          if (length(out) == length(init)) {
            init <- out
          } else {
            stop("function output is not length 1 or same length as 'init'")
          }
        } else {
          xs[[1]][[i]] <- out
          init <- lapply(xs, `[[`, i)
        }
      }
    } else {
      for (i in ind) {
        out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
        if (is.list(out)) {
          if (length(out) == length(init)) {
            init <- out
          } else {
            stop("function output is not length 1 or same length as 'init'")
          }
        } else {
          xs[[1]][[i]] <- out
          init <- lapply(xs, `[[`, i)
        }
      }
    }
    if (!isFALSE(simplify) && length(out))
      init <- simplify2array(init[[1]], higher = (simplify == "array"))
    init
  } else {
    len <- length(ind) + 1L
    out <- vector("list", len)
    if (mis) {
      if (right) {
        out[[len]] <- init
        for (i in rev(ind)) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not a list length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      } else {
        out[[1L]] <- init
        for (i in ind) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      }
    } else {
      if (right) {
        out[[len]] <- init
        for (i in rev(ind)) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not a list length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      } else {
        for (i in ind) {
          out[[i]] <- init
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              init <- out1
            } else {
              stop("function output is not length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            init <- lapply(xs, `[[`, i)
          }
        }
        out[[len]] <- init
      }
    }
    if (all(lengths(out) == 1L)) 
      out <- unlist(out, recursive = FALSE)
    if (!isFALSE(simplify) && length(out))
      out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array"))
    out
  }
 }
	#' 'Reduce' with arbitrary number of arguments
	#'
	#' Applies a function to the corresponding elements of given vectors,
	#' in a reductionist way. (This is not related to the [Apache Hadoop
	#' MapReduce](https://hadoop.apache.org/) project ... while this may
	#' suggest the name 'MapReduce' is a poor choice for this function, it
	#' is a logical combination of R's [Map()] and [Reduce()] functions.)
	#'
	#' @details
	#'
	#' The function is called with 'k' vectors as arguments, and the
	#' function should return either a list of k single values, or a
	#' single return value. If a singleton, then the value is assumed to
	#' replace the 'i'th value within the first argument (list or vector),
	#' and the next iteration's "previous" values will be take from this
	#' and this i'th value from all vectors except the first; if a
	#' k-length list, then this list is used as the next iteration's
	#' "previous" (the other value in each of the k argument vectors).
	#'
	#' If 'init' is given, this logically adds it to the start (when
	#' proceeding left-to-right) or the end of the input vectors,
	#' respectively. If these possibly-augmented vectors have n > 1
	#' vectors, 'MapReduce' successively applies f to the first (last) 2
	#' values of each vector.
	#'
	#' @section Translation from 'Reduce':
	#'
	#' Not all functions will translate directly from [Reduce()]. Many
	#' functions that work well with 'Reduce' expect exactly two
	#' arguments, with an effective length of 1 each; one example is '+'
	#' and other binary operators. In contrast, 'MapReduce' expects k
	#' vectors and will operate on all vectors (of length 2) in a call.
	#'
	#' While the use of binary operators as the sole function passed to
	#' 'MapReduce' is ignoring the 'Map' component, to translate binary
	#' operators, one must wrap the function so that it expects a single
	#' argument of length two. For example
	#'
	#' ```
	#' binary_func <- function(a, b) `+`(a, b) # Reduce
	#' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce
	#' ```
	#'
	#' Author: Bill Evans ([email protected])
	#'
	#' License: MIT: use as you will, no warranty, keep this license and
	#' citation if re-distributed
	#'
	#' @param f a function of the 'k' arity if this is called with k
	#' arguments.
	#' @param ... vectors or lists
	#' @param init an R object of the same kind as the elements of 'x'.
	#' @param right a logical indicating whether to proceed from left to
	#' right (default) or from right to left.
	#' @param accumulate a logical indicating whether the successive
	#' reduce combinations should be accumulated. By default, only the
	#' final combination is used.
	#' @return 'list', possibly with nested lists, and further if
	#' 'accumulate' is true
	#' @export
	#' @md
	#' @examples
	#' ### Adaptations from [Reduce()] documentation:
	#' x <- list(1, 2, 3)
	#' Reduce("+", x)
	#' # "+" is not a good function for MapReduce, as it expects two
	#' # separate inputs instead of one or more vectors, length 2; so
	#' # instead, we must adapt
	#' MapReduce(function(a) a[1] + a[2], x)
	#' # perhaps [sum()] is better
	#' MapReduce(sum, x)
	#'
	#' Reduce("+", x, accumulate = TRUE)
	#' MapReduce(sum, x, accumulate = TRUE)
	#'
	#' cfrac_binary <- function(u, v) u + 1 / v
	#' cfrac_unary <- function(u) u[1] + 1 / u[2]
	#' ## Continued fraction approximation for pi:
	#' x <- c(3, 7, 15, 1, 292)
	#' Reduce(cfrac_binary, x, right = TRUE)
	#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
	#'
	#' ## Continued fraction approximation for Euler's number (e):
	#' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8)
	#' Reduce(cfrac_binary, x, right = TRUE)
	#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
	#'
	#' ### Extensions, allowing for more arguments
	#'
	#' # suggested by https://stackoverflow.com/q/56612304
	#' if (require("data.table")) {
	#'
	#' nT <- 5
	#' int <- rep(1.1, 5)
	#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
	#' f <- function(payments, interests) {
	#' list(payments[2] + interests[2] * payments[1], interests[2])
	#' }
	#'
	#' loan[, c("interest", "balance") := 0
	#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
	#' ][,interest := c(0, diff(balance) - payment[-1])
	#' ]
	#'
	#' ### that was boring, 'int' was unchanging; here's the impetus for MapReduce
	#'
	#' set.seed(2)
	#' (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1)
	#' # [1] 1.082062 1.103697 1.131757 1.077392 1.098395
	#'
	#' # refresh 'loan'
	#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
	#'
	#' loan[, c("interest", "balance") := 0
	#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
	#' ][,interest := c(0, diff(balance) - payment[-1])
	#' ]
	#'
	#' }
	MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) {
	mis <- missing(init)
	xs <- list(...)
	lens <- lengths(xs)
	len1 <- max(lens)
	if (length(lens) == 0L \|\| len1 == 0L)
	return(if (mis) NULL else init)
	if (!all(lens %in% c(len1, 1L))) {
	stop("all arguments must be same length or length 1")
	}
	xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L])
	f <- match.fun(f)
	xs <- lapply(xs, function(x) if (!is.vector(x) \|\| is.object(x)) as.list(x) else x)
	ind <- seq_len(len1)
	if (mis) {
	if (right) {
	init <- lapply(xs, `[[`, len1)
	ind <- ind[-len1]
	} else {
	init <- lapply(xs, `[[`, 1L)
	ind <- ind[-1L]
	}
	}
	if (!accumulate) {
	if (right) {
	for (i in rev(ind)) {
	out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out)) {
	if (length(out) == length(init)) {
	init <- out
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out
	init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	for (i in ind) {
	out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out)) {
	if (length(out) == length(init)) {
	init <- out
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out
	init <- lapply(xs, `[[`, i)
	}
	}
	}
	if (!isFALSE(simplify) && length(out))
	init <- simplify2array(init[[1]], higher = (simplify == "array"))
	init
	} else {
	len <- length(ind) + 1L
	out <- vector("list", len)
	if (mis) {
	if (right) {
	out[[len]] <- init
	for (i in rev(ind)) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not a list length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	out[[1L]] <- init
	for (i in ind) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	}
	} else {
	if (right) {
	out[[len]] <- init
	for (i in rev(ind)) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not a list length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	for (i in ind) {
	out[[i]] <- init
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	init <- out1
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	init <- lapply(xs, `[[`, i)
	}
	}
	out[[len]] <- init
	}
	}
	if (all(lengths(out) == 1L))
	out <- unlist(out, recursive = FALSE)
	if (!isFALSE(simplify) && length(out))
	out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array"))
	out
	}
	}