Skip to content

Instantly share code, notes, and snippets.

@seandavi
Created September 26, 2011 15:02
Show Gist options
  • Select an option

  • Save seandavi/1242438 to your computer and use it in GitHub Desktop.

Select an option

Save seandavi/1242438 to your computer and use it in GitHub Desktop.
Testing the neo4j database from python to load the Gene Ontology database
#!/usr/bin/env python
# install neo4j-embedded first
from neo4j import GraphDatabase
import neo4j
import csv
import shutil
import logging
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("termfile",
help="The term.txt file as downloaded from the gene ontology site")
parser.add_argument("termdeffile",
help="The term_definition.txt file as downloaded from the gene ontology site")
parser.add_argument("term2termfile",
help="The term2term.txt file as downloaded from the gene ontology site")
opts=parser.parse_args()
logging.basicConfig(level=logging.DEBUG)
try:
shutil.rmtree('/tmp/biodb')
except OSError:
pass
db = GraphDatabase('/tmp/biodb')
termaccmap={}
termidmap ={}
relationshipmap={}
def create_go_term(line):
if(line[6]=='1'):
relationshipmap[line[0]]=line[1]
with db.transaction:
term = db.node(acc=line[3],term_type=line[2],name=line[1])
termaccmap[line[3]]=term.id
termidmap[line[0]]=term.id
return(term)
def get_term(acc):
tmp = db.node[termaccmap[acc]]
return({'acc':tmp['acc'],'term_type':tmp['term_type'],'name':tmp['name'],'definition':tmp['definition']})
terms = None
with db.transaction:
terms = db.node()
db.reference_node.GO_TERM(terms)
logging.info('creating terms')
reader = csv.reader(open(opts.termfile),delimiter="\t")
for row in reader:
create_go_term(row)
logging.info('adding definitions')
reader = csv.reader(open(opts.termdeffile),delimiter="\t")
with db.transaction:
for row in reader:
term = db.node[termidmap[row[0]]]
term['definition']=row[1]
logging.info('adding relationships')
reader = csv.reader(open(opts.term2termfile),delimiter="\t")
j=0
with db.transaction:
for row in reader:
j+=1
if((j % 10000)==0):
logging.debug("%d relationships added" % (j,))
parent=db.node[termidmap[row[2]]]
child =db.node[termidmap[row[3]]]
parent.relationships.create(relationshipmap[row[1]],child)
#test retrieval
print get_term('GO:2001093')
db.shutdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment