Skip to content

Instantly share code, notes, and snippets.

@dpastoor
Last active January 16, 2017 18:08
Show Gist options
  • Save dpastoor/cdd38ee4341a6d0532ca5051296a0753 to your computer and use it in GitHub Desktop.
Save dpastoor/cdd38ee4341a6d0532ca5051296a0753 to your computer and use it in GitHub Desktop.
chunked processing to optimize the number of future threads spawned
library(stringr)
library(tibble)
library(tidyverse)
# subdirectories in rundir_001 each contain a fort.50 file that should be read in, thinned, and saved to satchel with additional
# information about the run appended to each chain's output
dirs <- list.dirs("../modeling/rundir_001/", recursive = F)
# match runs such as run008c1_est_01 as chain runs correspond to run<x>c<y>
run_desc_regex <- "run(\\d+)_nids_(\\d+)_(.+)_c(\\d+)_est_(\\d+)"
is_chain_dir <- grepl(pattern = run_desc_regex, dirs)
chain_dirs <- dirs[is_chain_dir]
chain_df <- data_frame(dir = chain_dirs)
chain_info <- chain_df %>% bind_cols(
map_df(str_match_all(chain_dirs, run_desc_regex),
~ data_frame(
runno = as_numeric(.[,2]),
nids = as_numeric(.[,3]),
scenario = .[,4],
chain = as_numeric(.[,5]),
est = as_numeric(.[,6]))
)
)
library(future)
library(tidyverse)
library(PKPDmisc)
library(satchel)
satchel <- Satchel$new("ex", "data/derived/satchel")
plan(multiprocess)
processed_res <- chain_info %>% chunk_df() %>%
by_slice(function(df_chunk) {
future({
by_row(df_chunk, function(df) {
fullPath <- normalizePath(df$dir)
runname <- basename(fullPath)
fortfile <- normalizePath(file.path(fullPath, "fort.50"))
if (!file.exists(fortfile)) {
message("no fort.50 file found for: ", runname)
return(FALSE)
}
data <- as_data_frame(data.table::fread(fortfile, header = FALSE, data.table = F))
names(data) <- c("ITERATION", "ID", "CL", "V")
output <- data %>%
thin_by(10) %>%
mutate(
chain = df$chain,
runno = df$runno,
nids = df$nids,
scenario = df$scenario
)
satchel$save(output, data_name = runname)
return(TRUE)
})
})
}, .to = "res")
# check if processed correctly for all values
map_lgl(processed_res$res, value) %>% all()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment