Forked from lpantano/hack chipseeker annotation with CpGI
Created
May 2, 2016 14:01
-
-
Save GuangchuangYu/2a0f6eace332b4f002427971dc0991ed to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
library(ChIPseeker) | |
library(GenomicRanges) | |
# Hack chipseeker function to add annotaiton | |
getGenomicAnnoStat <- function(peakAnno) { | |
if ( class(peakAnno) == "GRanges" ) | |
peakAnno <- as.data.frame(peakAnno) | |
anno <- peakAnno$annotation | |
## anno <- sub(" \\(.+", "", anno) | |
anno[grep("exon 1 of", anno)] <- "1st Exon" | |
anno[grep("intron 1 of", anno)] <- "1st Intron" | |
anno[grep("Exon \\(", anno)] <- "Other Exon" | |
anno[grep("Intron \\(", anno)] <- "Other Intron" | |
anno[grep("Downstream", anno)] <- "Downstream (<=3kb)" | |
anno[grep("Promoter", anno)] <- "Promoter" | |
## count frequency | |
anno.table <- table(anno) | |
## calculate ratio | |
anno.ratio <- anno.table/ sum(anno.table) * 100 | |
anno.df <- as.data.frame(anno.ratio) | |
colnames(anno.df) <- c("Feature", "Frequency") | |
anno.df$Numbers <- anno.table | |
lvs <- c( | |
"Promoter", | |
"CpG", #here new class | |
"5' UTR", | |
"3' UTR", | |
"1st Exon", | |
"Other Exon", | |
"1st Intron", | |
"Other Intron", | |
"Downstream (<=3kb)", | |
"Distal Intergenic", | |
"Others") | |
anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature]) | |
anno.df <- anno.df[order(anno.df$Feature),] | |
return(anno.df) | |
} | |
# get CpG | |
cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE) | |
cpg_r = makeGRangesFromDataFrame(cpg, | |
keep.extra.columns = FALSE, | |
ignore.strand = TRUE, end.field = "X4", | |
start.field = "X3", seqnames.field = "X2") | |
# here we create the annotation | |
# df is a data frame with chrom and positions columns and anything else | |
df_r = makeGRangesFromDataFrame(df, | |
keep.extra.columns = TRUE, | |
ignore.strand = TRUE, end.field = "end", | |
start.field = "start", seqnames.field = "chr") | |
an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000), | |
annoDb = "org.Hs.eg.db", verbose = FALSE) | |
idx = findOverlaps(an@anno, cpg_r) | |
an@detailGenomicAnnotation[,"cpg"] = FALSE | |
an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE | |
an@anno$annotation[queryHits(idx)] = "CpG" | |
.df = getGenomicAnnoStat(an@anno) | |
slot(an, "annoStat") <- .df | |
plotAnnoBar(an) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment