Last active
June 10, 2020 09:31
-
-
Save al2na/c368ae8de776b7d95aec2ee9f9bb9777 to your computer and use it in GitHub Desktop.
new annotation functions for genomation,
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(genomation) | |
library(ggplot2) | |
#' Annotate given ranges with genomic features | |
#' | |
#' The function annotates a target GRangesList or GRanges object as overlapping | |
#' or not with | |
#' the elements of named GRangesList. This is useful to annotate your regions | |
#' of interest with genomic features with arbitrary categories such as repeat | |
#' classes or families, or output from genome segmentation alogorithms such | |
#' as chromHMM. | |
#' | |
#' | |
#' @param target GRanges or GRangesList object storing chromosome locations to be | |
#' annotated (e.g. chipseq peaks) | |
#' @param features a named GRangesList object containing GRanges objects different set | |
#' of features. The function calculates percent overlaps with and without | |
#' precendence at the same time. The order of objects in GRangesList | |
#' defines their precedence. If a range in \code{target} overlaps with | |
#' a more precedent range in an element of \code{features}, the other | |
#' overlaps from other less precedent elments will be discarded. This is | |
#' useful for getting piecharts where percentages should add up to 100. | |
#' | |
#' @param strand if set to TRUE, annotation features and target features will | |
#' be overlapped based on strand (def:FALSE) | |
#' @param intersect.chr logical value, whether to select only chromosomes | |
#' that are common to feature and target. FALSE by default | |
#' @return returns an \code{AnnotationByFeature} object or if target is | |
#' a GRangesList, a list of \code{AnnotationByFeature} objects. | |
#' | |
#' @export | |
#' | |
#' @examples | |
#' | |
#' @seealso | |
#' | |
#' target=cage;features=gene.parts | |
#' | |
#' @docType methods | |
#' @rdname annotateWithFeatures-methods | |
setGeneric("annotateWithFeatures", | |
function(target,features,strand=FALSE, intersect.chr=FALSE) | |
standardGeneric("annotateWithFeatures") ) | |
#' @aliases annotateWithFeatures,GRanges,GRangesList-method | |
#' @rdname annotateWithFeatures-methods | |
setMethod("annotateWithFeatures", | |
signature(target = "GRanges", feature = "GRangesList"), | |
function(target, features, strand, intersect.chr){ | |
if(intersect.chr){ | |
message('intersecting chromosomes...') | |
chrs = intersect(unique(as.character(seqnames(target))), | |
unique(unlist( | |
lapply(feature, | |
function(x)as.character(seqnames(x)))))) | |
if(length(chrs) == 0) | |
stop('target and feature do not have intersecting chromosomes') | |
target=target[seqnames(target) %in% chrs] | |
feature = lapply(feature, function(x)x[seqnames(x) %in% chrs]) | |
} | |
x=unlist(features) | |
# get overlap matrix | |
overlaps <- as.matrix(GenomicRanges::findOverlaps(target ,x, | |
ignore.strand= !strand )) | |
# get a datable from overlaps and the annotation category names | |
dt=data.table(cbind(overlaps,annot=names(x)[overlaps[,2]])) | |
# order dt by order of feature gategories in features GRangesList | |
dt=dt[order(match(dt$annot,names(features))),] | |
# get the number of target ranges overlapping with feature categories | |
# and the number of features with target ranges | |
annot=queryHits=subjectHits=NULL | |
hits=dt[, list(targetHits=length(unique(queryHits )), | |
featureHits=length(unique(subjectHits ))) ,by=annot] | |
# get the number of target ranges overlapping with feature categories | |
# and the number of features with target ranges | |
# with precendence | |
p.hits=dt[!duplicated(dt$queryHits), list(targetHits=length(unique(queryHits )), | |
featureHits=length(unique(subjectHits ))) ,by=annot] | |
# get a matrix of overlaps for targets | |
# this matrix shows overlap status for each element in target | |
dmemb=dcast(dt,queryHits~annot, | |
fun.aggregate=function(x) ifelse(length(x)>0,1,0), | |
value.var="annot") | |
# order dmemb by order of feature categories in features GRangesList | |
col.order=c(colnames(dmemb)[1], | |
colnames(dmemb)[-1][order(match(colnames(dmemb)[-1], | |
names(features)))]) | |
dmemb=dmemb[,col.order,with=FALSE] | |
# get an empty matrix of zeros, not all target ranges will be overlapping | |
# with an annotation | |
memb=matrix(0,nrow=length(target),ncol=ncol(dmemb)-1) | |
# fill in the matrix of 0s with values from the overlap matrix | |
memb[as.numeric(dmemb$queryHits),]=as.matrix(dmemb[,2:ncol(dmemb),with=FALSE]) | |
colnames(memb)=colnames(dmemb)[-1] | |
# calculate number of overlaps etc before hand | |
# populate stats for features that didn't overlap with the target | |
no.of.OlapFeat=num.annotation=num.precedence=rep(0,length(features)) | |
names(no.of.OlapFeat)=names(num.precedence)=names(num.annotation)=names(features) | |
no.of.OlapFeat[match( hits$annot,names(features))]=hits$featureHits | |
num.annotation[match( hits$annot,names(features))]=hits$targetHits | |
num.precedence[match( p.hits$annot,names(features))]=p.hits$targetHits | |
# output the object | |
new("AnnotationByFeature", | |
members = memb, | |
annotation = 100*num.annotation/length(target), | |
precedence = 100*num.precedence/length(target), | |
num.annotation = num.annotation, | |
num.precedence = num.precedence, | |
no.of.OlapFeat = no.of.OlapFeat , | |
perc.of.OlapFeat= 100*no.of.OlapFeat/sapply(features,length) | |
) | |
}) | |
#' @aliases annotateWithFeatures,GRangesList,GRangesList-method | |
#' @rdname annotateWithFeatures-methods | |
setMethod("annotateWithFeatures", | |
signature(target = "GRangesList", features= "GRangesList"), | |
function(target, features, strand, intersect.chr){ | |
l = list() | |
if(is.null(names(target))) | |
names(target) = 1:length(target) | |
for(i in 1:length(target)){ | |
name = names(target)[i] | |
message(paste('Working on:',name)) | |
x = target[[name]] | |
l[[name]] = annotateWithFeatures(x, features, strand, intersect.chr) | |
} | |
return(l) | |
}) | |
# ---------------------------------------------------------------------------- # | |
#' Plots the percentage of overlapping ranges with genomic features in a heatmap | |
#' | |
#' This function plots a heatmap for percentage of overlapping ranges | |
#' with provided genomic features. The input object is a list of | |
#' \code{AnnotationByFeature} objects, which contains necessary information | |
#' about overlap statistics to make the plot. | |
#' | |
#' @param l a \code{list} of \code{AnnotationByFeature} objects | |
#' @param cluster TRUE/FALSE. If TRUE the heatmap is going to be clustered | |
#' using hierarchical clustering | |
#' @param col a vector of two colors that will be used for interpolation. | |
#' The first color is the lowest one, the second is the highest one | |
#' @param plot If FALSE, does not plot the heatmap just returns the matrix | |
#' used to make the heatmap | |
#' | |
#' | |
#' @return returns the matrix used to make the heatmap when plot FALSE, | |
#' otherwise returns ggplot2 object which can be modified further. | |
#' | |
#' | |
#' @examples | |
#' | |
#' library(GenomicRanges) | |
#' data(cage) | |
#' data(cpgi) | |
#' | |
#' cage$tpm = NULL | |
#' gl = GRangesList(cage=cage, cpgi=cpgi) | |
#' | |
#' bed.file = system.file("extdata/chr21.refseq.hg19.bed", package = "genomation") | |
#' gene.parts = readTranscriptFeatures(bed.file) | |
#' annot = annotateWithFeatures(gl, gene.parts, intersect.chr=TRUE) | |
#' | |
#' | |
#' \donttest{ | |
#' heatmapTargetAnnotation(annot) | |
#' } | |
#' | |
#' @export | |
#' @docType methods | |
#' @rdname heatmapTargetAnnotation-methods | |
#' @docType methods | |
heatmapTargetAnnotation<-function(l, cluster=FALSE, col=c("white","blue"), | |
precedence=FALSE,plot=TRUE){ | |
if(!all(unlist(lapply(l, class)) == "AnnotationByFeature")) | |
stop("All elements of the input list need to be AnnotationByFeature-class") | |
if(precedence){ | |
d = data.frame(do.call(rbind, lapply(l, function(x)x@precedence))) | |
}else{ | |
d = data.frame(do.call(rbind, lapply(l, function(x)x@annotation))) | |
} | |
if(!plot){ | |
return(as.matrix(d)) | |
} | |
d = data.frame('SampleName'=rownames(d), d) | |
ind = 1:nrow(d) | |
if(cluster == TRUE){ | |
h = hclust(dist(d[,-1])) | |
ind = h$order | |
} | |
m = reshape2::melt(d, id.vars='SampleName') | |
colnames(m)[2] = 'Position' | |
p = ggplot(m, aes(x=Position, y=SampleName, fill=value, colour="white")) + | |
scale_fill_gradient(low =col[1], high = col[2]) + | |
theme( | |
axis.title.x = element_text(colour='white'), | |
axis.text.x = element_text(colour='black', face='bold'), | |
axis.text.y = element_text(colour='black'), | |
axis.title.y = element_text(colour='white', face='bold')) + | |
ylim(as.character(d$SampleName[ind])) | |
p = p + geom_tile(color='white') + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
p | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment