Created
September 12, 2012 17:21
-
-
Save aaronwolen/3708334 to your computer and use it in GitHub Desktop.
Generate data.frame of feature annotations using Bioconductor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Generate data.frame of feature annotations | |
#' | |
#' Use bioconductor annotation packages to create a data.frame of feature/probe | |
#' annotations. | |
#' | |
#' @param chip character string identifying chip model (e.g., "illuminaHumanv2") | |
#' @param features optional character vector of chip features (i.e., probeset ids) | |
#' @param vars character vector of desired annotations. These must match objects | |
#' provided by the annotation package (e.g., "CHR") | |
#' @param duplicate.values how should duplicate values be handled? The default | |
#' approach is to "remove" them, leaving only the first value. Setting this to | |
#' "keep" will cause duplicates to be returned as a "|" delimited string. | |
#' | |
#' @examples | |
#' fdata.exp <- annotate_chip(chip = "illuminaHumanv2", | |
#' vars = c("symbol", "chr", "chrloc", "chrlocend")) | |
annotate_chip <- function(chip, features, vars, duplicate.values = "remove") { | |
duplicate.values <- match.arg(duplicate.values, c("remove", "keep")) | |
# Check for annotation package | |
pkg <- paste(sub("\\.db$", "", chip), "db", sep = ".") | |
if(suppressWarnings(!require(pkg, quietly = TRUE, character.only = TRUE))) { | |
stop(paste(pkg, "annotation package must be installed."), call. = FALSE) | |
} | |
# Available annotation data | |
available <- ls(paste("package", pkg, sep = ":")) | |
available <- available[!available %in% c(chip, pkg) & !grepl("_db", available)] | |
# Error message if vars is unset or invalid | |
var_error <- function(available) { | |
rds <- tools::Rd_db(pkg) | |
names(rds) <- sub("\\.Rd$", "", basename(names(rds))) | |
rds <- rds[names(rds) %in% available] | |
names(rds) <- sub(chip, "", names(rds)) | |
details <- lapply(rds, tools:::.Rd_get_metadata, "title") | |
details <- unlist(lapply(details, paste, collapse = "\n\t\t")) | |
cat(paste("\n", names(rds), "\n\t\t", details, "\n", sep = ""), "\n") | |
stop("The vars argument requires a vector containing one or more of the ", | |
"variables listed above.\n", call. = FALSE) | |
} | |
if(missing(vars)) { | |
var_error(available) | |
} | |
if(missing(features)) { | |
features <- keys(eval(parse(text = pkg))) | |
} | |
# Format requests | |
requests <- paste(chip, toupper(sub(chip, "", vars)), sep = "") | |
# Check what's available | |
unavailable <- setdiff(requests, available) | |
requests <- setdiff(requests, unavailable) | |
# Nice labels | |
annot.labels <- tolower(gsub(chip, "", requests)) | |
names(annot.labels) <- requests | |
if(length(requests) == 0) { | |
var_error(available) | |
} | |
if(length(unavailable) > 0) { | |
warning("The following variables are unavailable in ", pkg, | |
paste(":\n", unavailable, collapse = "\n"), call. = FALSE) | |
} | |
annots <- sapply(requests, simplify = FALSE, function(req) | |
mget(features, eval(parse(text = req)), ifnotfound = NA)) | |
# Remove duplicate entries for each probe-set | |
if(duplicate.values == "remove") { | |
annots <- lapply(annots, function(y) | |
lapply(y, function(z) z[1])) | |
} else { | |
annots <- lapply(annots, function(y) | |
lapply(y, function(z) paste(z, collapse = " | "))) | |
} | |
# Covnert to data.frame | |
fdata <- data.frame(do.call("cbind", lapply(annots, unlist)), | |
stringsAsFactors = F) | |
names(fdata) <- annot.labels[names(fdata)] | |
# Convert columns to proper type | |
for(c in names(fdata)) { | |
fdata[, c] <- type.convert(fdata[, c]) | |
} | |
# Extrapolate chromosome strand from location | |
if( "chrloc" %in% names(fdata)) { | |
if( any(fdata$chrloc < 0) ) { | |
fdata$chrstrand <- ifelse(fdata$chrloc > 0, "+", "-") | |
fdata$chrloc <- abs(fdata$chrloc) | |
} | |
} | |
return(fdata) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment