Last active
February 18, 2024 20:19
-
-
Save dmurdoch/59a14d10ac7f8e909e7fa596e21bb89d to your computer and use it in GitHub Desktop.
Search R package for duplicate definitions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# While in the top level directory of a package, run `dupnames()` to check the source for multiple | |
# definitions of the same identifier. The *last* definition is the one that will be included in your | |
# package; earlier ones are overwritten. | |
containsNames <- function(expr, names) { | |
if (is.name(expr) && as.character(expr) %in% names) | |
return(TRUE) | |
if (is.call(expr)) { | |
fn <- expr[[1]] | |
if (is.name(fn) && as.character(fn) == "function") | |
return(FALSE) | |
for (i in seq_along(expr)) | |
if (containsNames(expr[[i]], names)) | |
return(TRUE) | |
} | |
FALSE | |
} | |
dupnames <- function(path = ".", ignore = NULL) { | |
Rfiles <- pkgload:::find_code(path) | |
allnames <- data.frame(names=character(), filename=character(), line = numeric()) | |
result <- NULL | |
ignore0 <- ignore | |
# Iterate over all source files | |
for (f in Rfiles) { | |
# We'll add removed variables to the ignore list, just | |
# for this file | |
ignore <- ignore0 | |
exprs <- parse(f, keep.source = TRUE) | |
# First pass: add removals to ignorable variables | |
for (i in seq_along(exprs)) { | |
expr <- exprs[[i]] | |
if (is.call(expr) && | |
length(expr) > 1 && | |
is.name(expr[[1]]) && | |
as.character(expr[[1]]) %in% c("rm", "remove")) { | |
argnames <- names(expr) | |
# We ignore removals from a different target | |
if (any(argnames %in% c("pos", "envir", "inherits"))) | |
next | |
keep <- rep(TRUE, length(expr)) | |
keep[1] <- FALSE | |
if (any(argnames == "list")) { | |
keep[argnames == "list"] <- FALSE | |
list <- try(eval(expr[["list"]])) | |
if (!inherits(list, "try-error")) | |
ignore <- c(ignore, list) | |
} | |
for (j in which(keep)) { | |
name <- NULL | |
if (is.name(expr[[j]])) | |
name <- as.character(expr[[j]]) | |
else if (is.character(expr[[j]]) && | |
length(expr[[j]]) == 1) | |
name <- expr[[j]] | |
if (!is.null(name)) | |
ignore <- c(ignore, name) | |
} | |
} | |
} | |
# Second pass: collect all assignments | |
locs <- getSrcLocation(exprs) | |
names <- character(length(exprs)) | |
lines <- numeric(length(exprs)) | |
for (i in seq_along(exprs)) { | |
expr <- exprs[[i]] | |
if (is.call(expr) && | |
length(expr) > 1 && | |
is.name(expr[[1]]) && | |
as.character(expr[[1]]) %in% c("<-", "=") && | |
is.name(expr[[2]])) { | |
ident <- as.character(expr[[2]]) | |
# Have we been told to ignore this identifier? | |
if (ident %in% ignore) | |
next | |
# Does the expression modify the name or use the | |
# ignored identifiers? Ignore that. | |
modifies <- FALSE | |
for (j in seq_along(expr)[-(1:2)]) | |
if (containsNames(expr[[j]], c(ident, ignore))) { | |
modifies <- TRUE | |
break | |
} | |
if (modifies) | |
next | |
names[i] <- ident | |
lines[i] <- locs[i] | |
} | |
} | |
# Compare assignments to ones we already have | |
keep <- names != "" | |
if (any(keep)) { | |
names <- names[keep] | |
lines <- lines[keep] | |
prev <- nrow(allnames) | |
allnames <- rbind(allnames, data.frame(name = names, filename = basename(f), line = lines)) | |
dups <- which(duplicated(allnames$name)) | |
dups <- dups[dups > prev] | |
if (any(dups)) { | |
origfile <- character(length(dups)) | |
origline <- numeric(length(dups)) | |
for (i in seq_along(dups)) { | |
prev <- which(allnames$name == allnames$name[dups[i]])[1] | |
origfile[i] <- allnames$filename[prev] | |
origline[i] <- allnames$line[prev] | |
} | |
result <- rbind(result, | |
data.frame(name = allnames$name[dups], | |
first = paste(origfile, origline, sep=":"), | |
dup = paste(allnames$filename[dups], allnames$line[dups], sep = ":"))) | |
} | |
} | |
} | |
result | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is some code to look for mistakes in package writing, where a function or other object is accidentally created twice. This is not necessarily a mistake; sometimes the object is created and then modified.
An attempt is made to detect the case of modifying the object by seeing if the object itself is used in the second definition. This won't always spot modifications, which sometimes use other variables. Setting the
ignore
argument to the names of such variables will also skip over those definitions.