Last active
January 16, 2017 18:08
-
-
Save dpastoor/cdd38ee4341a6d0532ca5051296a0753 to your computer and use it in GitHub Desktop.
chunked processing to optimize the number of future threads spawned
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(stringr) | |
| library(tibble) | |
| library(tidyverse) | |
| # subdirectories in rundir_001 each contain a fort.50 file that should be read in, thinned, and saved to satchel with additional | |
| # information about the run appended to each chain's output | |
| dirs <- list.dirs("../modeling/rundir_001/", recursive = F) | |
| # match runs such as run008c1_est_01 as chain runs correspond to run<x>c<y> | |
| run_desc_regex <- "run(\\d+)_nids_(\\d+)_(.+)_c(\\d+)_est_(\\d+)" | |
| is_chain_dir <- grepl(pattern = run_desc_regex, dirs) | |
| chain_dirs <- dirs[is_chain_dir] | |
| chain_df <- data_frame(dir = chain_dirs) | |
| chain_info <- chain_df %>% bind_cols( | |
| map_df(str_match_all(chain_dirs, run_desc_regex), | |
| ~ data_frame( | |
| runno = as_numeric(.[,2]), | |
| nids = as_numeric(.[,3]), | |
| scenario = .[,4], | |
| chain = as_numeric(.[,5]), | |
| est = as_numeric(.[,6])) | |
| ) | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(future) | |
| library(tidyverse) | |
| library(PKPDmisc) | |
| library(satchel) | |
| satchel <- Satchel$new("ex", "data/derived/satchel") | |
| plan(multiprocess) | |
| processed_res <- chain_info %>% chunk_df() %>% | |
| by_slice(function(df_chunk) { | |
| future({ | |
| by_row(df_chunk, function(df) { | |
| fullPath <- normalizePath(df$dir) | |
| runname <- basename(fullPath) | |
| fortfile <- normalizePath(file.path(fullPath, "fort.50")) | |
| if (!file.exists(fortfile)) { | |
| message("no fort.50 file found for: ", runname) | |
| return(FALSE) | |
| } | |
| data <- as_data_frame(data.table::fread(fortfile, header = FALSE, data.table = F)) | |
| names(data) <- c("ITERATION", "ID", "CL", "V") | |
| output <- data %>% | |
| thin_by(10) %>% | |
| mutate( | |
| chain = df$chain, | |
| runno = df$runno, | |
| nids = df$nids, | |
| scenario = df$scenario | |
| ) | |
| satchel$save(output, data_name = runname) | |
| return(TRUE) | |
| }) | |
| }) | |
| }, .to = "res") | |
| # check if processed correctly for all values | |
| map_lgl(processed_res$res, value) %>% all() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment