Skip to content

Instantly share code, notes, and snippets.

@muschellij2
Last active December 15, 2016 20:50
Show Gist options
  • Save muschellij2/c825923a131a730f1030970e368e9c4e to your computer and use it in GitHub Desktop.
Save muschellij2/c825923a131a730f1030970e368e9c4e to your computer and use it in GitHub Desktop.
library(data.table)
n = 2 * 10^9
p = runif(n)
big_bh_adjust = function(p) {
n_over_k = NULL
if (any(is.na(p))) {
stop("p-values can't have NA in them for this function")
}
# Get the number of p-values (m)
m = length(p)
# get sequence of k values
ks = seq(m)
# make a data.table - for faster sorting
DT = data.table(
p = p,
o = ks, key = "p")
# remove p for memory consumption
rm(list = c("p")); gc()
# sort based on p - increasing
setorder(DT, p)
# m/k * p <= alpha is what we're looking for
DT$n_over_k = m / ks
# remove stuff we don't need - memory
rm(list = c("ks", "m")); gc()
# generate new column of adjusted p-values
DT[ , padj := pmin(n_over_k * p, 1)]
# resort them in the order they were before
setorder(DT, o)
# get a vector back
padj = DT$padj
# remove data.table
rm(list = "DT"); gc()
# return adjusted p-values
return(padj)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment