Last active
September 25, 2018 00:06
-
-
Save oganm/72c12f6bd358d3a9b5796f15b3ea0c7a to your computer and use it in GitHub Desktop.
gene set enrichment for marker genes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(magrittr) | |
library(dplyr) | |
library(homologene) | |
library(gemmaAPI) # github.com/pavlidisLab/gemmaAAPI.R | |
library(markerGeneProfile) # github.com/pavlidisLab/markerGeneProfile | |
data("mouseMarkerGenesCombined") | |
# get mouse homologues of the hitlist | |
hitlist = readxl::read_xlsx('gene of interest.xlsx',col_names = FALSE) %>% | |
unlist %>% human2mouse %$% mouseGene | |
# here I am defining the negative set as all human genes with mouse homologues. | |
# The correct way to do this would be to to use the genes that could have been | |
# selected from your platform and/or genes that were expressed | |
# as it stands this is a huge negative set with many entirely unrelated genes that | |
# probably could not be chosen. | |
data("homologeneData") | |
negativeSet = homologeneData %>% # get data included in the homologene package. | |
filter(Taxonomy %in% c('9606')) %$% | |
Gene.Symbol %>% | |
human2mouse %$% | |
mouseGene | |
# here I am subsetting the negative set by genes that could be selected for neuroexpresso. | |
# for cortex this limitation doesn't exist as RNA-seq data was used alongside | |
# microarray data | |
annotation = gemmaAPI::getAnnotation('GPL339') | |
negativeSet = intersect(negativeSet, annotation$GeneSymbols) | |
# here I am testing for enrichment for whole brain cell types. You may want to replace | |
# this with a region specific list | |
mouseMarkerGenesCombined$All %>% sapply(function(markerSet){ | |
# number of white balls drawn are genes from your hitlist which are markers | |
q = sum(hitlist %in% markerSet) | |
# number of "black balls" are the genes that are not markers of the cell type | |
n = sum(!negativeSet %in% x) | |
# number of "white balls" are genes that are markers of the cell type | |
m = sum(negativeSet %in% x) | |
# total number drawn is the size of your hitlist | |
k = length(hitlist) | |
phyper(q,m,n,k,lower.tail= FALSE) # a simple enrichment test | |
}) %>% | |
p.adjust(method = 'fdr') %>% # multiple testing correction | |
{.<0.05} %>% # significance threshold | |
which %>% names |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment