muschellij2 · December 15, 2016 20:50
diff --git a/fast_fdr.R b/fast_fdr.R
 library(data.table)
 n = 2 * 10^9
 p = runif(n)
 big_bh_adjust = function(p) {
  n_over_k = NULL
  if (any(is.na(p))) {
    stop("p-values can't have NA in them for this function")
  }
  # Get the number of p-values (m)
  m = length(p)
  # get sequence of k values
  ks = seq(m)
  # make a data.table - for faster sorting
  DT = data.table(
    p = p, 
    o = ks, key = "p")
  # remove p for memory consumption
  rm(list = c("p")); gc()
  # sort based on p - increasing
  setorder(DT, p)
  # m/k * p <= alpha is what we're looking for
  DT$n_over_k = m / ks
  # remove stuff we don't need - memory
  rm(list = c("ks", "m")); gc()
  # generate new column of adjusted p-values
  DT[ , padj := pmin(n_over_k * p, 1)]
  # resort them in the order they were before
  setorder(DT, o)
  # get a vector back
  padj = DT$padj
  # remove data.table
  rm(list = "DT"); gc()
  # return adjusted p-values
  return(padj)
 }
	library(data.table)
	n = 2 * 10^9
	p = runif(n)
	big_bh_adjust = function(p) {
	n_over_k = NULL
	if (any(is.na(p))) {
	stop("p-values can't have NA in them for this function")
	}
	# Get the number of p-values (m)
	m = length(p)
	# get sequence of k values
	ks = seq(m)
	# make a data.table - for faster sorting
	DT = data.table(
	p = p,
	o = ks, key = "p")
	# remove p for memory consumption
	rm(list = c("p")); gc()
	# sort based on p - increasing
	setorder(DT, p)
	# m/k * p <= alpha is what we're looking for
	DT$n_over_k = m / ks
	# remove stuff we don't need - memory
	rm(list = c("ks", "m")); gc()
	# generate new column of adjusted p-values
	DT[ , padj := pmin(n_over_k * p, 1)]
	# resort them in the order they were before
	setorder(DT, o)
	# get a vector back
	padj = DT$padj
	# remove data.table
	rm(list = "DT"); gc()
	# return adjusted p-values
	return(padj)
	}