Skip to content

Instantly share code, notes, and snippets.

@pcantalupo
Last active June 13, 2021 15:55
Show Gist options
  • Save pcantalupo/08a4f9e72788ecb58e33f34879acc6d2 to your computer and use it in GitHub Desktop.
Save pcantalupo/08a4f9e72788ecb58e33f34879acc6d2 to your computer and use it in GitHub Desktop.
mapIds gene annotation symbol ensembl
library(org.Hs.eg.db)
(fields = columns(org.Hs.eg.db)) # show annotations available
symbols = c("MYC", "CCNE1", "TP53")
# get full gene annotation for MYC (takes 1 or 2 minutes to run)
geneinfo = lapply (fields, function (f) {
message (f)
# https://bioconductor.org/packages/devel/bioc/manuals/AnnotationDbi/man/AnnotationDbi.pdf
mapIds(org.Hs.eg.db, symbols[1], keytype="SYMBOL", column=f, multiVals = "list")
})
names(geneinfo) = fields
geneinfo # wow!
geneinfo$ENSEMBL[[1]] # get the ENSEMBL ID for MYC
geneinfo$REFSEQ[[1]] # get the Refseq accessions for MYC
# Specific function to convert from Symbol to Ensemblid
symbol2ensemblid = function(symbol = "TP53") {
require(org.Hs.eg.db)
if ( is.na(match(symbol, keys(org.Hs.eg.db, keytype = "SYMBOL"))) ) {
toReturn = NA
} else {
toReturn = as.vector(mapIds(org.Hs.eg.db, symbol, keytype="SYMBOL", column="ENSEMBL", multiVals = "first"))
}
return(toReturn)
}
genes = c("GNAS", "ADCY1", "ADCY2", "PRKCA", "CREB1", "MITF")
genes
symbol2ensemblid("GNAS")
myensemblids = sapply(genes, symbol2ensemblid)
as.data.frame (myensemblids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment