Last active
December 15, 2016 20:50
-
-
Save muschellij2/c825923a131a730f1030970e368e9c4e to your computer and use it in GitHub Desktop.
Fast FDR for BHoc (see https://en.wikipedia.org/wiki/False_discovery_rate)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
n = 2 * 10^9 | |
p = runif(n) | |
big_bh_adjust = function(p) { | |
n_over_k = NULL | |
if (any(is.na(p))) { | |
stop("p-values can't have NA in them for this function") | |
} | |
# Get the number of p-values (m) | |
m = length(p) | |
# get sequence of k values | |
ks = seq(m) | |
# make a data.table - for faster sorting | |
DT = data.table( | |
p = p, | |
o = ks, key = "p") | |
# remove p for memory consumption | |
rm(list = c("p")); gc() | |
# sort based on p - increasing | |
setorder(DT, p) | |
# m/k * p <= alpha is what we're looking for | |
DT$n_over_k = m / ks | |
# remove stuff we don't need - memory | |
rm(list = c("ks", "m")); gc() | |
# generate new column of adjusted p-values | |
DT[ , padj := pmin(n_over_k * p, 1)] | |
# resort them in the order they were before | |
setorder(DT, o) | |
# get a vector back | |
padj = DT$padj | |
# remove data.table | |
rm(list = "DT"); gc() | |
# return adjusted p-values | |
return(padj) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment