Last active
June 22, 2018 13:31
-
-
Save lwaldron/63b403803e91b3a3ce72592fa6e85f79 to your computer and use it in GitHub Desktop.
Add ranges to RNA-seq and microarray SummarizedExperiments where rownames are gene symbols in a MultiAssayExperiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.cMAE <- function(mae, x, name="newelement"){ | |
el <- ExperimentList(tmp=x) | |
names(el)[1] <- name | |
c(mae, el) | |
} | |
.hasSymbols <- function(x){ | |
mean(c(FALSE, grepl("^[A-Z0-9]{1,6}|^C[0-9]orf[0-9]{1,4}", rownames(x))), na.rm=TRUE) > 0.9 | |
} | |
.isSummarizedExperiment <- function(x){ | |
is(x, "SummarizedExperiment") & !is(x, "RangedSummarizedExperiment") | |
} | |
.makeListRanges <- function(x, gn){ | |
res <- list(unmapped=x[!x %in% names(gn)]) | |
x <- x[x %in% names(gn)] | |
gn <- gn[match(x, names(gn))] | |
res$mapped <- gn | |
return(res) | |
} | |
.getRangesOfSYMBOLS <- function(x){ | |
suppressPackageStartupMessages({ | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
library(org.Hs.eg.db) | |
library(GenomeInfoDb) | |
}) | |
entrez <- mapIds(org.Hs.eg.db, x, keytype = "SYMBOL", column = "ENTREZID") | |
gn <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
names(gn) <- mapIds(org.Hs.eg.db, names(gn), keytype="ENTREZID", column = "SYMBOL") | |
gn <- keepStandardChromosomes(granges(gn), pruning.mode="coarse") | |
seqlevelsStyle(gn) <- "NCBI" | |
return( .makeListRanges(x, gn) ) | |
## returns a list of length 2: "unmapped" is a character vector providing | |
## unmapped symbols, "mapped" is a GRanges object with ranges of mapped symbols. | |
} | |
symbolsToRanges <- function(obj, removeOriginals = TRUE){ | |
## obj: a MultiAssayExperiment | |
## any SummarizedExperiment elements with gene symbols as rows will have ranges added | |
## symbols where ranges can't be found are kept as another SummarizedExperiment | |
can.fix <- vapply(experiments(obj), function(y){ | |
.hasSymbols(y) & .isSummarizedExperiment(y) | |
}, TRUE) | |
## | |
for (i in which(can.fix)){ | |
lookup <- .getRangesOfSYMBOLS(rownames(obj[[i]])) | |
rse <- obj[[i]][names(lookup$mapped), ] | |
rowRanges(rse) <- lookup$mapped | |
obj <- .cMAE(obj, rse, name=paste0(names(obj)[i], "_ranged")) | |
if(length(lookup$unmapped > 0)){ | |
se <- obj[[i]][lookup$unmapped, ] | |
obj <- .cMAE(obj, se, name=paste0(names(obj)[i], "_unranged")) | |
} | |
} | |
if(removeOriginals & any(can.fix)) | |
obj <- obj[, , -which(can.fix)] | |
return(obj) | |
## example | |
## symbolsToRanges(miniACC) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment