Last active
December 23, 2015 05:41
-
-
Save dholstius/b3ed3e66a194c9c332dc to your computer and use it in GitHub Desktop.
(Left) join with custom comparator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' (Left) join with a custom comparator | |
#' | |
#' @param left data.frame | |
#' @param right data.frame | |
#' @param by names of columns to join by | |
#' @param fun custom comparator (see examples) | |
#' | |
#' @examples | |
#' my_df <- data.frame(cyl = c(4, 4, 6, 8), vs = c(0, 1, NA, NA), foo = c("A", "B", "C", "D")) | |
#' my_fun <- function (e1, e2) (e1 == e2) | is.na(e2) | |
#' fun_join(mtcars, my_df, by = c("cyl", "vs"), fun = my_fun) | |
#' | |
#' @export | |
fun_join <- function (left, right, by = intersect(names(left), names(right)), fun = `==`) { | |
left_data <- as.data.frame(left) | |
right_data <- as.data.frame(right) | |
# Stack of (logical) matrices. FIXME: Optimize this! | |
dn <- list(left = row.names(left_data), right = row.names(right_data), by = by) | |
A <- array(NA, dim = lengths(dn), dimnames = as.vector(dn)) | |
for (j in by) { | |
xl <- left_data[, j, drop = TRUE] | |
xr <- right_data[, j, drop = TRUE] | |
A[, , j] <- outer(xl, xr, fun) | |
} | |
# Matching rows | |
i <- which(apply(A, c("left", "right"), all), arr.ind = TRUE) | |
ir <- i[, "right", drop = TRUE] | |
il <- i[, "left", drop = TRUE] | |
# Don't need any `by` columns from right_data | |
jr <- setdiff(names(right_data), by) | |
jl <- names(left_data) | |
joined <- cbind(left_data[il, jl], right_data[ir, jr, drop = FALSE]) | |
return(joined) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My immediate use case was to allow wildcard matching --- as in the example, where
NA
is effectively treated as a wildcard.