Created
September 26, 2011 15:02
-
-
Save seandavi/1242438 to your computer and use it in GitHub Desktop.
Testing the neo4j database from python to load the Gene Ontology database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # install neo4j-embedded first | |
| from neo4j import GraphDatabase | |
| import neo4j | |
| import csv | |
| import shutil | |
| import logging | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("termfile", | |
| help="The term.txt file as downloaded from the gene ontology site") | |
| parser.add_argument("termdeffile", | |
| help="The term_definition.txt file as downloaded from the gene ontology site") | |
| parser.add_argument("term2termfile", | |
| help="The term2term.txt file as downloaded from the gene ontology site") | |
| opts=parser.parse_args() | |
| logging.basicConfig(level=logging.DEBUG) | |
| try: | |
| shutil.rmtree('/tmp/biodb') | |
| except OSError: | |
| pass | |
| db = GraphDatabase('/tmp/biodb') | |
| termaccmap={} | |
| termidmap ={} | |
| relationshipmap={} | |
| def create_go_term(line): | |
| if(line[6]=='1'): | |
| relationshipmap[line[0]]=line[1] | |
| with db.transaction: | |
| term = db.node(acc=line[3],term_type=line[2],name=line[1]) | |
| termaccmap[line[3]]=term.id | |
| termidmap[line[0]]=term.id | |
| return(term) | |
| def get_term(acc): | |
| tmp = db.node[termaccmap[acc]] | |
| return({'acc':tmp['acc'],'term_type':tmp['term_type'],'name':tmp['name'],'definition':tmp['definition']}) | |
| terms = None | |
| with db.transaction: | |
| terms = db.node() | |
| db.reference_node.GO_TERM(terms) | |
| logging.info('creating terms') | |
| reader = csv.reader(open(opts.termfile),delimiter="\t") | |
| for row in reader: | |
| create_go_term(row) | |
| logging.info('adding definitions') | |
| reader = csv.reader(open(opts.termdeffile),delimiter="\t") | |
| with db.transaction: | |
| for row in reader: | |
| term = db.node[termidmap[row[0]]] | |
| term['definition']=row[1] | |
| logging.info('adding relationships') | |
| reader = csv.reader(open(opts.term2termfile),delimiter="\t") | |
| j=0 | |
| with db.transaction: | |
| for row in reader: | |
| j+=1 | |
| if((j % 10000)==0): | |
| logging.debug("%d relationships added" % (j,)) | |
| parent=db.node[termidmap[row[2]]] | |
| child =db.node[termidmap[row[3]]] | |
| parent.relationships.create(relationshipmap[row[1]],child) | |
| #test retrieval | |
| print get_term('GO:2001093') | |
| db.shutdown() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment