Created
April 29, 2016 17:16
-
-
Save lpantano/c344593ff933f23648e2dde6af8be87b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
library(ChIPseeker) | |
library(GenomicRanges) | |
# Hack chipseeker function to add annotaiton | |
getGenomicAnnoStat <- function(peakAnno) { | |
if ( class(peakAnno) == "GRanges" ) | |
peakAnno <- as.data.frame(peakAnno) | |
anno <- peakAnno$annotation | |
## anno <- sub(" \\(.+", "", anno) | |
anno[grep("exon 1 of", anno)] <- "1st Exon" | |
anno[grep("intron 1 of", anno)] <- "1st Intron" | |
anno[grep("Exon \\(", anno)] <- "Other Exon" | |
anno[grep("Intron \\(", anno)] <- "Other Intron" | |
anno[grep("Downstream", anno)] <- "Downstream (<=3kb)" | |
anno[grep("Promoter", anno)] <- "Promoter" | |
## count frequency | |
anno.table <- table(anno) | |
## calculate ratio | |
anno.ratio <- anno.table/ sum(anno.table) * 100 | |
anno.df <- as.data.frame(anno.ratio) | |
colnames(anno.df) <- c("Feature", "Frequency") | |
anno.df$Numbers <- anno.table | |
lvs <- c( | |
"Promoter", | |
"CpG", #here new class | |
"5' UTR", | |
"3' UTR", | |
"1st Exon", | |
"Other Exon", | |
"1st Intron", | |
"Other Intron", | |
"Downstream (<=3kb)", | |
"Distal Intergenic", | |
"Others") | |
anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature]) | |
anno.df <- anno.df[order(anno.df$Feature),] | |
return(anno.df) | |
} | |
# get CpG | |
cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE) | |
cpg_r = makeGRangesFromDataFrame(cpg, | |
keep.extra.columns = FALSE, | |
ignore.strand = TRUE, end.field = "X4", | |
start.field = "X3", seqnames.field = "X2") | |
# here we create the annotation | |
# df is a data frame with chrom and positions columns and anything else | |
df_r = makeGRangesFromDataFrame(df, | |
keep.extra.columns = TRUE, | |
ignore.strand = TRUE, end.field = "end", | |
start.field = "start", seqnames.field = "chr") | |
an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000), | |
annoDb = "org.Hs.eg.db", verbose = FALSE) | |
idx = findOverlaps(an@anno, cpg_r) | |
an@detailGenomicAnnotation[,"cpg"] = FALSE | |
an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE | |
an@anno$annotation[queryHits(idx)] = "CpG" | |
.df = getGenomicAnnoStat(an@anno) | |
slot(an, "annoStat") <- .df | |
plotAnnoBar(an) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment