MichaelChirico · May 23, 2020 19:01
diff --git a/psum_benchmark.R b/psum_benchmark.R
 library(data.table)
 library(microbenchmark)

 get_reduce = function(FUN, ELEMENT) {
  function(..., na.rm=FALSE) {
    l = list(...)
    if (length(l) == 1L && is.list(l[[1L]])) l = l[[1L]]
    if (length(l) == 1L && (identical(FUN, `|`) || identical(FUN, `&`))) l[[1L]] = as.logical(l[[1L]])
    if (na.rm) {
      # TODO: nafill to support complex input, then use nafill here
      out = Reduce(FUN, lapply(l, function(x) {x[is.na(x)] = as(ELEMENT, typeof(x)); x}))
      is.na(out) = Reduce(`&`, lapply(l, is.na))
      return(out)
    }
    Reduce(FUN, l)
  }
 }
 reduce_psum = get_reduce(`+`, 0)
 reduce_pprod = get_reduce(`*`, 1)
 reduce_pany = get_reduce(`|`, 0)
 reduce_pall = get_reduce(`&`, 1)

 # hypers: N, J, % NA, input type, pure/impure input type
 out_types = c('logical', 'integer', 'numeric', 'complex')
 hypers = CJ(
  N = c(1e2, 1e5, 5e7), # rows
  J = c(1L, 5L, 20L),   # columns
  p = c(0, .3),         # % missing
  o = out_types         # output type
 )
 hypers = hypers[J > 1L | !(o != 'complex' | p == 0)] # redundant benchmarks
 hypers[J == 20L & N == 5e7, N := 1e7] # exhausting memory
 hypers[ , rep := fcase(N == 100, 1e4, N == 1e5, 1e3, N > 1e6, 1e2)]


 # from bench::mark()$times, get the scale, & scale
 scale_times = function(t) {
  max_time = max(sapply(t/1e9, max)) ## microbenchmark --> baseline times in ns
  scales = c(-Inf, -9, -6, -3, 0, 3, 6, 9, Inf)
  scale_abbr = c('ps', 'ns', 'µs', 'ms', 's', 'ks', 'Ms', 'Gs')
  scale_idx = findInterval(log10(max_time), scales)
  scale = scale_abbr[scale_idx]
  list(multiple = 10^(-scales[scale_idx]), label = scale)
 }

 timing_file = 'psum_timings.csv'

 pdf('psum_benchmarks.pdf')
 options(scipen = 5L)
 par(mfrow = c(2L, 2L), mar = c(0, 0, 0, 0), oma = c(5.1, 4.1, 4.1, 2.1))
 for (ii in seq_len(nrow(hypers))) {
  with(hypers[ii], {
    cat(sprintf('\rParameter set %d\tN=%d|J=%d|p=%.1f|o=%s        ', ii, N, J, p, o))
  })
  ll = with(hypers[ii], {
    lapply(1:J, function(j) {
      val = switch(o,
        logical = runif(N) > .5,
        integer = sample(-2:2, N, TRUE),
        numeric = rnorm(N),
        complex = complex(real = rnorm(N), imaginary = rnorm(N))
      )
      if (p > 0) is.na(val) <- sample(N, p*N)
      val
    })
  })
  times = rbindlist(lapply(
    setNames(nm = c('psum', 'pprod', 'pany', 'pall')), function(FUN) {
      target_fun = get(FUN, mode = 'function')
      reduce_fun = get(paste0('reduce_', FUN), mode = 'function')

      microbenchmark(
        times = hypers[ii, rep],
        reduce = eval(call('reduce_fun', ll)),
        target = eval(call('target_fun', ll))
      )
    }),
    idcol = 'test_fun'
  )
  fwrite(cbind(hypers[ii, !'rep'], times), timing_file, append = file.exists(timing_file))
  scale = scale_times(times$time)
  times[ , time := time * scale$multiple/1e9]
  # doesn't seem to line up with IQR

  xlim = times[ , quantile(time, c(1,3)/4), keyby = .(test_fun, expr)][ , c(.95, 1.05)*range(V1)]
  xlab = sprintf('Execution time (%s)', scale$label)

  times[ , by = test_fun, {
    ratio = .SD[ , median(time), keyby = expr][ , V1[1L]/V1[2L]]
    slower = ratio <= 1
    boxplot(
      time ~ expr, horizontal = TRUE, notch = TRUE, outline = FALSE,
      log = 'x', ylim = xlim, las = 1L, col = c('red', 'blue'),
      xaxt = if (.GRP > 2L) 's' else 'n', xpd = TRUE,
      yaxt = if (.GRP %% 2L == 1L) 's' else 'n',
      xlab = '', ylab = '', main = ''
    )
    main = sprintf(
      '%s: %.2fx %s',
      .BY$test_fun,
      if (slower) 1/ratio else ratio,
      if (slower) 'slower' else 'faster'
    )
    if (.GRP > 2L) mtext(side = 1L, xlab, line = 3)
    mtext(side = 3L, line = -1, main, col = if (slower) 'red' else 'black')
  }]
  param_label = with(hypers[ii], {
    sprintf('N=%d, J=%d, %%NA=%.0f, output=%s', N, J, 100*p, o)
  })
  title(outer = TRUE, param_label)
 }
 dev.off()
	library(data.table)
	library(microbenchmark)

	get_reduce = function(FUN, ELEMENT) {
	function(..., na.rm=FALSE) {
	l = list(...)
	if (length(l) == 1L && is.list(l[[1L]])) l = l[[1L]]
	if (length(l) == 1L && (identical(FUN, `\|`) \|\| identical(FUN, `&`))) l[[1L]] = as.logical(l[[1L]])
	if (na.rm) {
	# TODO: nafill to support complex input, then use nafill here
	out = Reduce(FUN, lapply(l, function(x) {x[is.na(x)] = as(ELEMENT, typeof(x)); x}))
	is.na(out) = Reduce(`&`, lapply(l, is.na))
	return(out)
	}
	Reduce(FUN, l)
	}
	}
	reduce_psum = get_reduce(`+`, 0)
	reduce_pprod = get_reduce(`*`, 1)
	reduce_pany = get_reduce(`\|`, 0)
	reduce_pall = get_reduce(`&`, 1)

	# hypers: N, J, % NA, input type, pure/impure input type
	out_types = c('logical', 'integer', 'numeric', 'complex')
	hypers = CJ(
	N = c(1e2, 1e5, 5e7), # rows
	J = c(1L, 5L, 20L), # columns
	p = c(0, .3), # % missing
	o = out_types # output type
	)
	hypers = hypers[J > 1L \| !(o != 'complex' \| p == 0)] # redundant benchmarks
	hypers[J == 20L & N == 5e7, N := 1e7] # exhausting memory
	hypers[ , rep := fcase(N == 100, 1e4, N == 1e5, 1e3, N > 1e6, 1e2)]


	# from bench::mark()$times, get the scale, & scale
	scale_times = function(t) {
	max_time = max(sapply(t/1e9, max)) ## microbenchmark --> baseline times in ns
	scales = c(-Inf, -9, -6, -3, 0, 3, 6, 9, Inf)
	scale_abbr = c('ps', 'ns', 'µs', 'ms', 's', 'ks', 'Ms', 'Gs')
	scale_idx = findInterval(log10(max_time), scales)
	scale = scale_abbr[scale_idx]
	list(multiple = 10^(-scales[scale_idx]), label = scale)
	}

	timing_file = 'psum_timings.csv'

	pdf('psum_benchmarks.pdf')
	options(scipen = 5L)
	par(mfrow = c(2L, 2L), mar = c(0, 0, 0, 0), oma = c(5.1, 4.1, 4.1, 2.1))
	for (ii in seq_len(nrow(hypers))) {
	with(hypers[ii], {
	cat(sprintf('\rParameter set %d\tN=%d\|J=%d\|p=%.1f\|o=%s ', ii, N, J, p, o))
	})
	ll = with(hypers[ii], {
	lapply(1:J, function(j) {
	val = switch(o,
	logical = runif(N) > .5,
	integer = sample(-2:2, N, TRUE),
	numeric = rnorm(N),
	complex = complex(real = rnorm(N), imaginary = rnorm(N))
	)
	if (p > 0) is.na(val) <- sample(N, p*N)
	val
	})
	})
	times = rbindlist(lapply(
	setNames(nm = c('psum', 'pprod', 'pany', 'pall')), function(FUN) {
	target_fun = get(FUN, mode = 'function')
	reduce_fun = get(paste0('reduce_', FUN), mode = 'function')

	microbenchmark(
	times = hypers[ii, rep],
	reduce = eval(call('reduce_fun', ll)),
	target = eval(call('target_fun', ll))
	)
	}),
	idcol = 'test_fun'
	)
	fwrite(cbind(hypers[ii, !'rep'], times), timing_file, append = file.exists(timing_file))
	scale = scale_times(times$time)
	times[ , time := time * scale$multiple/1e9]
	# doesn't seem to line up with IQR

	xlim = times[ , quantile(time, c(1,3)/4), keyby = .(test_fun, expr)][ , c(.95, 1.05)*range(V1)]
	xlab = sprintf('Execution time (%s)', scale$label)

	times[ , by = test_fun, {
	ratio = .SD[ , median(time), keyby = expr][ , V1[1L]/V1[2L]]
	slower = ratio <= 1
	boxplot(
	time ~ expr, horizontal = TRUE, notch = TRUE, outline = FALSE,
	log = 'x', ylim = xlim, las = 1L, col = c('red', 'blue'),
	xaxt = if (.GRP > 2L) 's' else 'n', xpd = TRUE,
	yaxt = if (.GRP %% 2L == 1L) 's' else 'n',
	xlab = '', ylab = '', main = ''
	)
	main = sprintf(
	'%s: %.2fx %s',
	.BY$test_fun,
	if (slower) 1/ratio else ratio,
	if (slower) 'slower' else 'faster'
	)
	if (.GRP > 2L) mtext(side = 1L, xlab, line = 3)
	mtext(side = 3L, line = -1, main, col = if (slower) 'red' else 'black')
	}]
	param_label = with(hypers[ii], {
	sprintf('N=%d, J=%d, %%NA=%.0f, output=%s', N, J, 100*p, o)
	})
	title(outer = TRUE, param_label)
	}
	dev.off()
No results found