Created
October 18, 2017 16:36
-
-
Save Dudemanguy/675568d6b8270fbc6eef094eb1a897e0 to your computer and use it in GitHub Desktop.
Script for quickly searching and fetching terms from GO database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages(library(GO.db)) | |
suppressPackageStartupMessages(library(biomaRt)) | |
#script for quickly searching and fetching terms from GO database | |
query <- readline("Input your search query in regex. \n") | |
zz <- Ontology(GOTERM) | |
goterms <- unlist(Term(GOTERM)) | |
whmf <- grep(query, ignore.case=TRUE, goterms) | |
df <- data.frame(goterms[whmf]) | |
goterm_list <- rownames(df) | |
#get child terms | |
a <- as.list(GOBPOFFSPRING) | |
b <- as.list(GOCCOFFSPRING) | |
c <- as.list(GOMFOFFSPRING) | |
a <- a[goterm_list] | |
b <- b[goterm_list] | |
c <- c[goterm_list] | |
combined <- c(unlist(a), unlist(b), unlist(c), goterm_list) | |
unique_terms <- unique(combined) | |
#convert to refseq | |
ensembl <- useMart("ensembl") | |
ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl) | |
mrna <- getBM(attributes=c("go_id", "refseq_mrna"), filters="go", values=c(unique_terms), mart=ensembl) | |
ncrna <- getBM(attributes=c("go_id", "refseq_ncrna"), filters="go", values=c(unique_terms), mart=ensembl) | |
mrna_char <- mrna[,2] | |
ncrna_char <- ncrna[,2] | |
transcript_id <- unique(c(mrna_char, ncrna_char)) | |
#import expressed transcript table and intersect with query results | |
x <- read.delim("input_table.txt") | |
y <- x[x$transcript_ID %in% transcript_id,] | |
rownames(y) <- y[,1] | |
y <- y[-(1)] | |
write.table(y, file="expressed_genes.txt", sep="\t", quote=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment