Skip to content

Instantly share code, notes, and snippets.

@chasemc
Created August 16, 2021 13:47
Show Gist options
  • Save chasemc/ee0ce2e6d843fc4ac384e293a9462ee6 to your computer and use it in GitHub Desktop.
Save chasemc/ee0ce2e6d843fc4ac384e293a9462ee6 to your computer and use it in GitHub Desktop.
Parse nextflow execution_trace file
library(ggplot2)
execution_trace <- '/home/chase/Documents/pipeline_info/execution_trace_2021-08-14_09-52-45.txt'
execution_trace <- read.delim(execution_trace, sep = "\t")
process_names <- sapply(strsplit(execution_trace$name, " "), function(x) x[[1]])
split_names <- strsplit(process_names, ":")
max_len <- max(lengths(split_names))
split_names <- lapply(split_names,
function(x){
x[1:max_len]
})
split_names <- do.call(rbind, split_names)
colnames(split_names) <- paste0("group_", 1:ncol(split_names))
execution_trace <- cbind.data.frame(execution_trace, split_names)
temp <- strsplit(execution_trace$duration, " ")
duration <- lapply(temp,
function(x){
time_unit <- sub("[^a-z]+", "", x)
sum(as.vector(c(ms = 1, s = 1e3, m = 1e3*60)[time_unit]) * as.numeric(sub("[a-z]+$", "", x)))
})
execution_trace$duration_seconds <- unlist(duration)
temp <- strsplit(execution_trace$realtime, " ")
realtime <- lapply(temp,
function(x){
time_unit <- sub("[^a-z]+", "", x)
sum(as.vector(c(ms = 1, s = 1e3, m = 1e3*60)[time_unit]) * as.numeric(sub("[a-z]+$", "", x)))
})
execution_trace$realtime_seconds <- unlist(realtime)
ggplot(execution_trace) +
geom_boxplot(aes( group_2, realtime_seconds)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
@chasemc
Copy link
Author

chasemc commented Aug 16, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment