Created
March 2, 2015 17:59
-
-
Save sahirbhatnagar/178619f32963b30ba8b2 to your computer and use it in GitHub Desktop.
Annotating Methylation Probes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
methyl.annotate <- function(file, thresholdp=1e-3, thresholdq=1e-1, tissue){ | |
DT <- fread(file) | |
setnames(DT, c("V1","V2","V3","V4","V5"), c("probe","pvalue","z","sd","effect")) | |
# use this to get CHR and BP, then merge with betareg results | |
probe.info <- hm450[DT[["probe"]]] | |
f <- data.table::data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, | |
BP=as.numeric(probe.info@elementMetadata$probeStart)) | |
data.table::setkey(f,probe) | |
# get nearest Transcription start sites | |
TSS <- data.table::data.table(FDb.InfiniumMethylation.hg19::getNearestTSS(probe.info), keep.rownames=TRUE) | |
data.table::setkey(TSS,rn) | |
# change names to merge in with DT | |
data.table::setnames(TSS, c("queryHits","subjectHits","distance","nearestGeneSymbol","nearestTranscript"), | |
c("TSSqueryHits","TSSsubjectHits","TSSdistance","TSSnearestGeneSymbol","TSSnearestTranscript")) | |
# get nearest genes | |
Transcript <- data.table::data.table(FDb.InfiniumMethylation.hg19::getNearestTranscript(probe.info), keep.rownames=TRUE) | |
data.table::setkey(Transcript,rn) | |
# DT[,c("significant.p","lower95","upper95","tissue"):= | |
# list(pvalue<=thresholdp,effect+qnorm(0.025)*sd,effect+qnorm(0.975)*sd,tissue),] | |
set(DT,i=NULL, j="significant.p", value=DT[["pvalue"]]<=thresholdp) | |
set(DT,i=NULL, j="lower95", value=DT[["effect"]]+qnorm(0.025)*DT[["sd"]]) | |
set(DT,i=NULL, j="upper95", value=DT[["effect"]]+qnorm(0.975)*DT[["sd"]]) | |
setkey(DT,probe) | |
# Merge all tables | |
DT <- DT[f][Transcript][TSS] | |
set(DT, i=NULL, j="CHR.num", value=as.numeric(sub("chr","", DT[["CHR"]]))) | |
# significant probes based on pvalue threshold | |
probenames.p <- DT[significant.p==TRUE][,c("probe","CHR.num","BP","effect","sd","nearestGeneSymbol","distance","TSSnearestGeneSymbol","TSSdistance"),with=F] | |
# GRanges object for significant hits based on p value | |
probenames.p.granges <- hm450[probenames.p[["probe"]]] | |
# q-value | |
qobj <- qvalue::qvalue(DT[["pvalue"]]) | |
set(DT, i=NULL, j="qvalue",value=qobj$qvalues) | |
DT[,"significant.q":=qvalue<=thresholdq] | |
# significant probes based on qvalue threshold | |
probenames.q <- DT[significant.q==TRUE][,c("probe","CHR.num","BP","effect","sd","nearestGeneSymbol","distance","TSSnearestGeneSymbol","TSSdistance"),with=F] | |
# GRanges object for significant hits based on q value | |
probenames.q.granges <- hm450[probenames.q[["probe"]]] | |
return(list(results=DT, significantp=probenames.p, significantq=probenames.q, | |
sigpGranges=probenames.p.granges,sigqGranges=probenames.q.granges)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment