Skip to content

Instantly share code, notes, and snippets.

@sahirbhatnagar
Created March 2, 2015 17:59
Show Gist options
  • Save sahirbhatnagar/178619f32963b30ba8b2 to your computer and use it in GitHub Desktop.
Save sahirbhatnagar/178619f32963b30ba8b2 to your computer and use it in GitHub Desktop.
Annotating Methylation Probes
methyl.annotate <- function(file, thresholdp=1e-3, thresholdq=1e-1, tissue){
DT <- fread(file)
setnames(DT, c("V1","V2","V3","V4","V5"), c("probe","pvalue","z","sd","effect"))
# use this to get CHR and BP, then merge with betareg results
probe.info <- hm450[DT[["probe"]]]
f <- data.table::data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
BP=as.numeric(probe.info@elementMetadata$probeStart))
data.table::setkey(f,probe)
# get nearest Transcription start sites
TSS <- data.table::data.table(FDb.InfiniumMethylation.hg19::getNearestTSS(probe.info), keep.rownames=TRUE)
data.table::setkey(TSS,rn)
# change names to merge in with DT
data.table::setnames(TSS, c("queryHits","subjectHits","distance","nearestGeneSymbol","nearestTranscript"),
c("TSSqueryHits","TSSsubjectHits","TSSdistance","TSSnearestGeneSymbol","TSSnearestTranscript"))
# get nearest genes
Transcript <- data.table::data.table(FDb.InfiniumMethylation.hg19::getNearestTranscript(probe.info), keep.rownames=TRUE)
data.table::setkey(Transcript,rn)
# DT[,c("significant.p","lower95","upper95","tissue"):=
# list(pvalue<=thresholdp,effect+qnorm(0.025)*sd,effect+qnorm(0.975)*sd,tissue),]
set(DT,i=NULL, j="significant.p", value=DT[["pvalue"]]<=thresholdp)
set(DT,i=NULL, j="lower95", value=DT[["effect"]]+qnorm(0.025)*DT[["sd"]])
set(DT,i=NULL, j="upper95", value=DT[["effect"]]+qnorm(0.975)*DT[["sd"]])
setkey(DT,probe)
# Merge all tables
DT <- DT[f][Transcript][TSS]
set(DT, i=NULL, j="CHR.num", value=as.numeric(sub("chr","", DT[["CHR"]])))
# significant probes based on pvalue threshold
probenames.p <- DT[significant.p==TRUE][,c("probe","CHR.num","BP","effect","sd","nearestGeneSymbol","distance","TSSnearestGeneSymbol","TSSdistance"),with=F]
# GRanges object for significant hits based on p value
probenames.p.granges <- hm450[probenames.p[["probe"]]]
# q-value
qobj <- qvalue::qvalue(DT[["pvalue"]])
set(DT, i=NULL, j="qvalue",value=qobj$qvalues)
DT[,"significant.q":=qvalue<=thresholdq]
# significant probes based on qvalue threshold
probenames.q <- DT[significant.q==TRUE][,c("probe","CHR.num","BP","effect","sd","nearestGeneSymbol","distance","TSSnearestGeneSymbol","TSSdistance"),with=F]
# GRanges object for significant hits based on q value
probenames.q.granges <- hm450[probenames.q[["probe"]]]
return(list(results=DT, significantp=probenames.p, significantq=probenames.q,
sigpGranges=probenames.p.granges,sigqGranges=probenames.q.granges))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment