lpantano · April 29, 2016 17:16
diff --git a/hack chipseeker annotation with CpGI b/hack chipseeker annotation with CpGI
 require(TxDb.Hsapiens.UCSC.hg19.knownGene)
 txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
 library(ChIPseeker)
 library(GenomicRanges)
 # Hack chipseeker function to add annotaiton
 getGenomicAnnoStat <- function(peakAnno) {
    if ( class(peakAnno) == "GRanges" )
        peakAnno <- as.data.frame(peakAnno)
    anno <- peakAnno$annotation
    ## anno <- sub(" \\(.+", "", anno)

    anno[grep("exon 1 of", anno)] <- "1st Exon"
    anno[grep("intron 1 of", anno)] <- "1st Intron"
    anno[grep("Exon \\(", anno)] <- "Other Exon"
    anno[grep("Intron \\(", anno)] <- "Other Intron"
    anno[grep("Downstream", anno)] <- "Downstream (<=3kb)"
    anno[grep("Promoter", anno)] <- "Promoter"
    ## count frequency
    anno.table <- table(anno)
    
    ## calculate ratio
    anno.ratio <- anno.table/ sum(anno.table) * 100
    anno.df <- as.data.frame(anno.ratio)
    colnames(anno.df) <- c("Feature", "Frequency")
    anno.df$Numbers <- anno.table
    lvs <- c(
             "Promoter",
             "CpG", #here  new class
             "5' UTR",
             "3' UTR",
             "1st Exon",
             "Other Exon",
             "1st Intron",
             "Other Intron",
             "Downstream (<=3kb)",
             "Distal Intergenic",
             "Others")

    anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature])
    anno.df <- anno.df[order(anno.df$Feature),]
    return(anno.df)
 }

 # get CpG
 cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE)
 cpg_r = makeGRangesFromDataFrame(cpg, 
                                keep.extra.columns = FALSE, 
                                    ignore.strand = TRUE, end.field = "X4",
                                    start.field = "X3", seqnames.field = "X2")

 # here we create the annotation
 # df is a data frame with chrom and positions columns and anything else
    df_r = makeGRangesFromDataFrame(df, 
                                    keep.extra.columns = TRUE, 
                                    ignore.strand = TRUE, end.field = "end",
                                    start.field = "start", seqnames.field = "chr")
    
    an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000), 
                      annoDb = "org.Hs.eg.db", verbose = FALSE)
    
    idx = findOverlaps(an@anno, cpg_r)
    an@detailGenomicAnnotation[,"cpg"] = FALSE
    an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE
    an@anno$annotation[queryHits(idx)] = "CpG"
    .df = getGenomicAnnoStat(an@anno)
    slot(an, "annoStat") <- .df
    plotAnnoBar(an)
	require(TxDb.Hsapiens.UCSC.hg19.knownGene)
	txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
	library(ChIPseeker)
	library(GenomicRanges)
	# Hack chipseeker function to add annotaiton
	getGenomicAnnoStat <- function(peakAnno) {
	if ( class(peakAnno) == "GRanges" )
	peakAnno <- as.data.frame(peakAnno)
	anno <- peakAnno$annotation
	## anno <- sub(" \\(.+", "", anno)

	anno[grep("exon 1 of", anno)] <- "1st Exon"
	anno[grep("intron 1 of", anno)] <- "1st Intron"
	anno[grep("Exon \\(", anno)] <- "Other Exon"
	anno[grep("Intron \\(", anno)] <- "Other Intron"
	anno[grep("Downstream", anno)] <- "Downstream (<=3kb)"
	anno[grep("Promoter", anno)] <- "Promoter"
	## count frequency
	anno.table <- table(anno)

	## calculate ratio
	anno.ratio <- anno.table/ sum(anno.table) * 100
	anno.df <- as.data.frame(anno.ratio)
	colnames(anno.df) <- c("Feature", "Frequency")
	anno.df$Numbers <- anno.table
	lvs <- c(
	"Promoter",
	"CpG", #here new class
	"5' UTR",
	"3' UTR",
	"1st Exon",
	"Other Exon",
	"1st Intron",
	"Other Intron",
	"Downstream (<=3kb)",
	"Distal Intergenic",
	"Others")

	anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature])
	anno.df <- anno.df[order(anno.df$Feature),]
	return(anno.df)
	}

	# get CpG
	cpg = readr::read_tsv("http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/cpgIslandExt.txt.gz", col_names = FALSE,progress = FALSE)
	cpg_r = makeGRangesFromDataFrame(cpg,
	keep.extra.columns = FALSE,
	ignore.strand = TRUE, end.field = "X4",
	start.field = "X3", seqnames.field = "X2")

	# here we create the annotation
	# df is a data frame with chrom and positions columns and anything else
	df_r = makeGRangesFromDataFrame(df,
	keep.extra.columns = TRUE,
	ignore.strand = TRUE, end.field = "end",
	start.field = "start", seqnames.field = "chr")

	an = annotatePeak(df_r, TxDb = txdb, tssRegion = c(-1000,1000),
	annoDb = "org.Hs.eg.db", verbose = FALSE)

	idx = findOverlaps(an@anno, cpg_r)
	an@detailGenomicAnnotation[,"cpg"] = FALSE
	an@detailGenomicAnnotation[queryHits(idx),"cpg"] = TRUE
	an@anno$annotation[queryHits(idx)] = "CpG"
	.df = getGenomicAnnoStat(an@anno)
	slot(an, "annoStat") <- .df
	plotAnnoBar(an)