Skip to content

Instantly share code, notes, and snippets.

@bhaskarvk
Last active August 25, 2016 21:18
Show Gist options
  • Save bhaskarvk/b288bbd6df2d4489e28dd1a914ae8a7e to your computer and use it in GitHub Desktop.
Save bhaskarvk/b288bbd6df2d4489e28dd1a914ae8a7e to your computer and use it in GitHub Desktop.
library(magrittr)
# You will also nead
# stringr, dplyr, purrr, pryr, tidyr, magick
# Install magic with devtools::install('ropensci/magick') CRAN copy won't work.
# Shell code to generate some random images
# mkdir -p ./data/RandomNoise
# (cd ./data/RandomNoise && for i in `seq 512 64 4096`; do convert -size ${i}x${i} xc: +noise Random random-${i}_$i.png; done)
# Read and Prepare data ----
d <- './data/RandomNoise'
num_iter <- 10 # Number of Iterations
files <- list.files(d)
files.sizes <- purrr::map_dbl(files,function(x) file.size(sprintf("%s/%s",d,x)))
files.df <- data.frame(path=stringr::str_c(d,'/',files),
name=files, size=files.sizes,
stringsAsFactors = F)
files.df %<>%
dplyr::arrange(size)
# Utility Functions ----
getObjectSize <- function(obj) {
c <- class(obj)
if(c=='magick-image') {
pryr::object_size(obj[[1]])
} else {
pryr::object_size(obj)
}
}
getFileReadPerf <- function(file, readFunc) {
gc()
pryr_mem_used_before_read <- pryr::mem_used()
t <- system.time(obj <- readFunc(file))
#obj_class = class(obj)
reading_function <- deparse(substitute(readFunc))
pryr_obj_size <- getObjectSize(obj)
pryr_mem_used_after_read <- pryr::mem_used()
rm(obj)
gc()
data.frame(
reading_function = reading_function,
#obj_class = obj_class,
User.Time= as.numeric(t[[1]]),
System.Time= as.numeric(t[[2]]),
Elapsed.Time= as.numeric(t[[3]]),
Object.Size = as.numeric(pryr_obj_size),
Mem.Increase = as.numeric(pryr_mem_used_after_read) -
as.numeric(pryr_mem_used_before_read),
stringsAsFactors = F
)
}
# calculate perf numbers ----
# Calculate read performance for each file
# We replicate the procedure `num_iter` times.
magick_image_read.perf <- purrr::map(replicate(num_iter,files.df$path),
function(x) {
getFileReadPerf(x,magick::image_read)})
# Combine the list into a single dataframe and add iteration and filesize columns
magick_image_read.perf.df <- dplyr::bind_rows(magick_image_read.perf) %>%
dplyr::mutate(File.Size=as.vector(replicate(num_iter,files.df$size)),
Iteration=rep(1:10,each=length(files))) %>%
dplyr::select(reading_function, Iteration,
File.Size, Object.Size, Mem.Increase,
User.Time, System.Time, Elapsed.Time )
# This dataframe stores the means over 10 iterations
magick_image_read.perf.mean.df <- magick_image_read.perf.df %>%
dplyr::group_by(File.Size, reading_function) %>%
dplyr::summarise_each(dplyr::funs(mean), Object.Size, Mem.Increase,
User.Time, System.Time, Elapsed.Time) %>%
dplyr::ungroup() %>% dplyr::arrange(File.Size)
# This dataframe stores the means over 10 iterations
magick_image_read.perf.median.df <- magick_image_read.perf.df %>%
dplyr::group_by(File.Size, reading_function) %>%
dplyr::summarise_each(dplyr::funs(median), Object.Size, Mem.Increase,
User.Time, System.Time, Elapsed.Time) %>%
dplyr::ungroup() %>% dplyr::arrange(File.Size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment