Created
March 13, 2017 11:30
-
-
Save explodecomputer/ff89e9e6bd459b7e5c198d46565b3c01 to your computer and use it in GitHub Desktop.
SNP IDs to KEGG and GO terms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source("https://bioconductor.org/biocLite.R") | |
# biocLite("KEGGREST") | |
# biocLite("org.Hs.eg.db") | |
library(biomaRt) | |
library(KEGGREST) | |
library(org.Hs.eg.db) | |
# example snp list | |
snp <- c("rs1007648", "rs112275031", "rs1124639", "rs11731570", "rs11743154", "rs12439909") | |
# for every SNP get an entrez gene ID | |
# this requires getting ensembl gene id first, then finding entrez gene id that matches it | |
Mart <- useMart(host="grch37.ensembl.org", biomart="ENSEMBL_MART_SNP",dataset="hsapiens_snp") | |
snps <- getBM(attributes=c("refsnp_id","chr_name","chrom_start","allele","minor_allele","minor_allele_freq", "ensembl_gene_stable_id"), filters="snp_filter",values=snp,mart=Mart) | |
ensembl <- useDataset("hsapiens_gene_ensembl", mart=useMart("ensembl")) | |
gene <- getBM(attributes=c("ensembl_gene_id", "entrezgene"), filters="ensembl_gene_id", values=snps$ensembl_gene_stable_id, mart=ensembl) | |
# note - these are just biomart's gene annotations for particular SNPs. | |
# there are probably better ways to map SNPs to genes | |
# list of mapped entrzgene | |
entrezgene <- as.character(gene$entrezgene[!is.na(gene$entrezgene)]) | |
# for every entrez gene id get a pathway from KEGG | |
# Get pathway for each entrez ID | |
pathway <- mget(entrezgene, envir=org.Hs.egPATH, ifnotfound=NA) | |
# for every entrez gene id get a GO term | |
goterm <- mget(entrezgene, envir=org.Hs.egGO, ifnotfound=NA) | |
goterm <- lapply(goterm, function(x) | |
{ | |
x <- unlist(x) | |
i <- grep("GO:", x) | |
return(x[i]) | |
}) | |
pathway <- data.frame(entrezgene=rep(entrezgene, sapply(pathway, length)), pathway=unlist(pathway)) | |
goterm <- data.frame(entrezgene=rep(entrezgene, sapply(goterm, length)), goterm=unlist(goterm)) | |
pathway <- merge(gene, pathway, by="entrezgene") | |
goterm <- merge(gene, goterm, by="entrezgene") | |
pathway <- merge(snps, pathway, by.x='ensembl_gene_stable_id', by.y='ensembl_gene_id') | |
goterm <- merge(snps, goterm, by.x='ensembl_gene_stable_id', by.y='ensembl_gene_id') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment