Last active
October 18, 2016 11:44
-
-
Save talegari/d9cafb30037e8412e6eda0b17cd9a729 to your computer and use it in GitHub Desktop.
lapply kind of behaviour on files of a directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################################################################### | |
# dir_apply | |
####################################################################### | |
# | |
# Description ---- | |
# | |
# lapply kind of behaviour on files of a directory. | |
# | |
# Arguments ---- | |
# | |
# directory : (string) directory containing input files. | |
# | |
# fun : (function object or string) function whose input is the | |
# filename as a string and additional arguments if any. | |
# Defaults to "read.csv". | |
# Function name or the function name as the string may be given. | |
# | |
# extensions : (character vector) extension names. | |
# Defaults to "csv". | |
# | |
# parallel : (flag) whether multiple cores may be used in parallel. | |
# Defaults to FALSE. | |
# If TRUE, `mclapply` call is made. | |
# Else a `lapply` call is made. | |
# | |
# preSchedule : (flag) supplied to mc.preschedule parameter of `mclapply`. | |
# Defaults to TRUE. | |
# Applicable only if parallel is TRUE. | |
# | |
# cores : (positive integer) number of cores to use. | |
# Defaults to -1, indicating the use of all cores available. | |
# Applicable only if parallel is TRUE. | |
# | |
# quietly : (flag) if FALSE(default), messages are not suppressed. | |
# ... : additional arguments to fun, passed asis and unevaluated. | |
# | |
# Value ---- | |
# | |
# returns a list with names same as the filenames in the directory. | |
# | |
# Imports/Depends ---- | |
# | |
# Packages `assertthat`, `tools`, `parallel` | |
# The function does not work on windows if parallel is TRUE | |
# | |
# Details ---- | |
# | |
# you might like fun to have side effects and return NULL ! | |
# | |
# Author ---- | |
# | |
# Srikanth KS (talegari) | |
# | |
dir_apply <- function(directory | |
, fun = "read.csv" | |
, extensions = c("csv") | |
, parallel = FALSE | |
, quietly = FALSE | |
, preSchedule = TRUE | |
, cores = -1L | |
, ...){ | |
# assertions | |
stopifnot(require("assertthat")) | |
assert_that(require("tools")) | |
assert_that(file_test("-d", directory)) | |
directory <- normalizePath(directory) | |
assert_that(is.readable(directory)) | |
assert_that(class(extensions) == "character") | |
assert_that(is.function(match.fun(fun))) | |
assert_that(is.flag(parallel)) | |
if(parallel){ | |
assert_that(require("parallel")) | |
assert_that(is.flag(preSchedule)) | |
assert_that(is.count(cores) || cores == -1L) | |
if(cores == -1L){ | |
cores <- detectCores() | |
} else { | |
cores <- min(detectCores(), cores) | |
} | |
} | |
filenames = list_files_with_exts(directory, extensions) | |
if(length(filenames) == 0 && quietly == FALSE){ | |
message("dir_apply: No files with given extension was found") | |
return(list()) | |
} | |
# lapply call | |
if(parallel == FALSE){ | |
oList <- lapply(filenames | |
, function(x){try(match.fun(fun)(x,...), silent = TRUE)} | |
) | |
} else { | |
oList <- mclapply(filenames | |
, function(x){try(match.fun(fun)(x,...), silent = TRUE)} | |
, mc.preschedule = preSchedule | |
, mc.cores = cores | |
) | |
} | |
names(oList) <- file_path_sans_ext(basename(filenames)) | |
nTryErrors <- sum(vapply(oList, is.error, logical(1))) | |
if(nTryErrors != 0 && quietly == FALSE){ | |
message("dir_apply: number of try errors is ", nTryErrors) | |
} | |
return(oList) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment