Last active
August 29, 2022 18:33
-
-
Save slowkow/1866791169bdee7e1661d0ee20d31391 to your computer and use it in GitHub Desktop.
Compute a hypergeometric p-value for a gene set of interest.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Try this with: | |
# - https://github.com/jefworks/genesets | |
# - https://github.com/slowkow/tftargets | |
#' Compute a hypergeometric p-value for your gene set of interest relative to | |
#' a universe of genes that you have defined. | |
#' | |
#' @param ids A vector with genes of interest. | |
#' @param universe A vector with all genes, including the genes of interest. | |
#' @param gene_sets A list of gene vectors. | |
#' @return A dataframe with one row for each gene set, ordered by p-value. | |
do_hyper <- function(ids, universe, gene_sets) { | |
retval <- as.data.frame(t(sapply(gene_sets, function(gene_set) { | |
x <- sum(ids %in% gene_set) | |
n <- length(ids) | |
X <- sum(universe %in% gene_set) | |
N <- length(universe) | |
if (x > 0 && X > 0) { | |
pval <- phyper(x - 1, X, N - X, n, lower.tail = FALSE) | |
retvec <- c( | |
"x" = x, "n" = n, "X" = X, "N" = N, | |
"enrichment" = (x / n) / (X / N), | |
"pval" = pval | |
) | |
} else { | |
retvec <- c( | |
"x" = x, "n" = n, "X" = X, "N" = N, | |
"enrichment" = 0, | |
"pval" = 1 | |
) | |
} | |
retvec | |
}))) | |
retval$Name <- names(gene_sets) | |
retval <- retval[order(retval$pval),] | |
retval$qval <- p.adjust(retval$pval, method = "fdr") | |
retval | |
} | |
#' Compute a p-value with Fisher's exact test for your gene set of interest. | |
#' | |
#' @param ids A vector with genes of interest. | |
#' @param universe A vector with all genes, including the genes of interest. | |
#' @param gene_sets A list of gene vectors. | |
#' @return A dataframe with one row for each gene set, ordered by p-value. | |
do_fisher <- function(ids, universe, gene_sets) { | |
retval <- as.data.frame(t(sapply(gene_sets, function(gene_set) { | |
x <- sum(ids %in% gene_set) | |
n <- length(ids) | |
X <- sum(universe %in% gene_set) | |
N <- length(universe) | |
if (x > 0 && X > 0) { | |
fish <- fisher.test( | |
x = matrix(c(x, n - x, X - x, N - X - (n - x)), 2), | |
alternative = "greater" | |
) | |
retvec <- c( | |
"x" = x, "n" = n, "X" = X, "N" = N, | |
"enrichment" = (x / n) / (X / N), | |
"orlow" = fish$conf.int[1], | |
"orhigh" = fish$conf.int[2], | |
"or" = unname(fish$estimate), | |
"pval" = fish$p.value | |
) | |
} else { | |
retvec <- c( | |
"x" = x, "n" = n, "X" = X, "N" = N, | |
"enrichment" = 1, | |
"orlow" = 1, | |
"orhigh" = 1, | |
"oddsratio" = 1, | |
"pval" = 1 | |
) | |
} | |
retvec | |
}))) | |
retval$Name <- names(gene_sets) | |
retval <- retval[order(retval$pval),] | |
retval$qval <- p.adjust(retval$pval, method = "fdr") | |
retval | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment