Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created March 13, 2017 11:30
Show Gist options
  • Save explodecomputer/ff89e9e6bd459b7e5c198d46565b3c01 to your computer and use it in GitHub Desktop.
Save explodecomputer/ff89e9e6bd459b7e5c198d46565b3c01 to your computer and use it in GitHub Desktop.
SNP IDs to KEGG and GO terms
# source("https://bioconductor.org/biocLite.R")
# biocLite("KEGGREST")
# biocLite("org.Hs.eg.db")
library(biomaRt)
library(KEGGREST)
library(org.Hs.eg.db)
# example snp list
snp <- c("rs1007648", "rs112275031", "rs1124639", "rs11731570", "rs11743154", "rs12439909")
# for every SNP get an entrez gene ID
# this requires getting ensembl gene id first, then finding entrez gene id that matches it
Mart <- useMart(host="grch37.ensembl.org", biomart="ENSEMBL_MART_SNP",dataset="hsapiens_snp")
snps <- getBM(attributes=c("refsnp_id","chr_name","chrom_start","allele","minor_allele","minor_allele_freq", "ensembl_gene_stable_id"), filters="snp_filter",values=snp,mart=Mart)
ensembl <- useDataset("hsapiens_gene_ensembl", mart=useMart("ensembl"))
gene <- getBM(attributes=c("ensembl_gene_id", "entrezgene"), filters="ensembl_gene_id", values=snps$ensembl_gene_stable_id, mart=ensembl)
# note - these are just biomart's gene annotations for particular SNPs.
# there are probably better ways to map SNPs to genes
# list of mapped entrzgene
entrezgene <- as.character(gene$entrezgene[!is.na(gene$entrezgene)])
# for every entrez gene id get a pathway from KEGG
# Get pathway for each entrez ID
pathway <- mget(entrezgene, envir=org.Hs.egPATH, ifnotfound=NA)
# for every entrez gene id get a GO term
goterm <- mget(entrezgene, envir=org.Hs.egGO, ifnotfound=NA)
goterm <- lapply(goterm, function(x)
{
x <- unlist(x)
i <- grep("GO:", x)
return(x[i])
})
pathway <- data.frame(entrezgene=rep(entrezgene, sapply(pathway, length)), pathway=unlist(pathway))
goterm <- data.frame(entrezgene=rep(entrezgene, sapply(goterm, length)), goterm=unlist(goterm))
pathway <- merge(gene, pathway, by="entrezgene")
goterm <- merge(gene, goterm, by="entrezgene")
pathway <- merge(snps, pathway, by.x='ensembl_gene_stable_id', by.y='ensembl_gene_id')
goterm <- merge(snps, goterm, by.x='ensembl_gene_stable_id', by.y='ensembl_gene_id')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment