Last active
January 16, 2016 07:54
-
-
Save gluc/3a6b63d7c1cb000475bf to your computer and use it in GitHub Desktop.
using data.tree to analyse Rprof files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tmp <- tempfile(fileext = ".log") | |
x <- runif(10e5) | |
f <- function(x) c(mean(x), median(x), sd(x)) | |
f2 <- function(x) { | |
c(sum(x) / length(x), | |
median.default(x), | |
sqrt(sum((x - sum(x) / length(x))^2L) / (length(x) - 1L))) | |
} | |
Rprof(tmp, interval = 0.01) | |
for (i in 1:100) f(x) | |
for (i in 1:100) f2(x) | |
Rprof(NULL) | |
filename <- tmp | |
parse_log <- function(filename = "Rprof.out") { | |
log <- scan(filename, what = "character", quote = "\"", sep = "\n", | |
strip.white = TRUE, multi.line = FALSE, quiet = TRUE) | |
if (length(log) == 1L) | |
stop(sprintf("'%s' file is empty.", filename)) | |
interval <- as.numeric(strsplit(log[1L],split = "=", fixed = TRUE)[[1L]][2L]) / 1e06 | |
log <- log[-1L] | |
log <- log[!grepl("^#", log)] | |
calls <- unique(log) | |
real.time <- tabulate(match(log, calls)) * interval | |
total.time <- sum(real.time) | |
pct.time <- real.time / total.time | |
calls <- lapply(strsplit(calls, split = " ", fixed = TRUE), rev) | |
calls <- vapply(calls, function(x) paste(c("calls", x), collapse = "/"), character(1L)) | |
structure(data.frame(pathString = calls, real = real.time, percent = pct.time, stringsAsFactors = FALSE), | |
total.time = total.time) | |
} | |
#detach("package:data.tree") | |
#remove.packages("data.tree") | |
#devtools::install_github("gluc/data.tree", ref = "dev") | |
library(data.tree) | |
packageVersion("data.tree") | |
tree <- FromDataFrameTable(parse_log(tmp)) | |
#make a copy of real, so we can verify the results later | |
tree$Set(realOrig = tree$Get("real")) | |
#on all but leaf nodes, sum up real from children, and add it to own time (if available) | |
tree$Do(function(node) node$real <- ifelse(is.null(node$real), 0, node$real) + Aggregate(node, "real", sum), | |
traversal = "post-order", filterFun = isNotLeaf) | |
print(tree, "realOrig", "real") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
levelName realOrig real | |
1 calls NA 14.66 | |
2 °--source NA 14.66 | |
3 °--withVisible NA 14.66 | |
4 °--eval NA 14.66 | |
5 °--eval NA 14.66 | |
6 ¦--f NA 7.18 | |
7 ¦ ¦--median 0.01 6.12 | |
8 ¦ ¦ °--median.default NA 6.11 | |
9 ¦ ¦ ¦--mean NA 5.05 | |
10 ¦ ¦ ¦ °--sort NA 5.05 | |
11 ¦ ¦ ¦ °--sort.default NA 5.05 | |
12 ¦ ¦ ¦ °--sort.int 4.37 5.05 | |
13 ¦ ¦ ¦ ¦--is.na 0.20 0.40 | |
14 ¦ ¦ ¦ °--any 0.14 0.28 | |
15 ¦ ¦ ¦--is.na 0.43 0.86 | |
16 ¦ ¦ °--any 0.10 0.20 | |
17 ¦ ¦--sd NA 0.62 | |
18 ¦ ¦ °--var 0.60 0.62 | |
19 ¦ ¦ °--is.data.frame 0.01 0.02 | |
20 ¦ °--mean NA 0.44 | |
21 ¦ °--mean.default 0.22 0.44 | |
22 °--f2 NA 7.48 | |
23 ¦--median.default NA 6.00 | |
24 ¦ ¦--mean NA 5.12 | |
25 ¦ ¦ °--sort NA 5.12 | |
26 ¦ ¦ °--sort.default NA 5.12 | |
27 ¦ ¦ °--sort.int 4.34 5.12 | |
28 ¦ ¦ ¦--is.na 0.29 0.58 | |
29 ¦ ¦ °--any 0.10 0.20 | |
30 ¦ ¦--is.na 0.32 0.64 | |
31 ¦ °--any 0.12 0.24 | |
32 ¦--- 0.10 0.20 | |
33 ¦--^ 0.28 0.56 | |
34 °--sum 0.36 0.72 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment