Skip to content

Instantly share code, notes, and snippets.

@lawlesst
Created September 25, 2012 20:52
Show Gist options
  • Save lawlesst/3784363 to your computer and use it in GitHub Desktop.
Save lawlesst/3784363 to your computer and use it in GitHub Desktop.
Vivo utilities using Jena Java classes via jnius.
from jnius import autoclass
"""
You need to set the Java classpath before running this.
e.g.
export CLASSPATH=".:/work/javalib/*"
"""
DB_URL = "jdbc:mysql://school.edu/vivo";
DB_USER = "user";
DB_PASSWD = "pass";
DB = "MySQL";
vivo = 'http://vivoweb.org/ontology/core#'
#Load classes
DBConnection = autoclass('com.hp.hpl.jena.db.DBConnection')
LayoutType = autoclass('com.hp.hpl.jena.sdb.store.LayoutType')
DatabaseType = autoclass('com.hp.hpl.jena.sdb.store.DatabaseType')
SDBConnection = autoclass('com.hp.hpl.jena.sdb.sql.SDBConnection')
SDBFactory = autoclass('com.hp.hpl.jena.sdb.SDBFactory')
StoreDesc = autoclass('com.hp.hpl.jena.sdb.StoreDesc')
FileManager = autoclass('com.hp.hpl.jena.util.FileManager')
SimpleSelector = autoclass('com.hp.hpl.jena.rdf.model.SimpleSelector')
#QueryFactory, QueryExecutionFactory, ResultSetFormatter
QueryFactory = autoclass('com.hp.hpl.jena.query.QueryFactory')
QueryExecutionFactory = autoclass('com.hp.hpl.jena.query.QueryExecutionFactory')
ResultSetFormatter = autoclass('com.hp.hpl.jena.query.ResultSetFormatter')
String = autoclass('java.lang.String')
from vivo_ingest.settings import NAMESPACE
class VivoConn(object):
def __init__(self):
#Create database connection
storeDesc = StoreDesc(LayoutType.LayoutTripleNodesHash,
DatabaseType.MySQL)
self.conn = SDBConnection(DB_URL,
DB_USER,
DB_PASSWD)
self.store = SDBFactory.connectStore(self.conn,
storeDesc)
self.dataset = SDBFactory.connectDataset(self.store);
self.vivo_model = self.dataset.getNamedModel('http://vitro.mannlib.cornell.edu/default/vitro-kb-2')
self.namespace = NAMESPACE
def get_next_uri(self):
import random
model = self.vivo_model
next_uri = None
while True:
next_uri = self.namespace + 'n' + str(random.randint(1, 9999999))
resource = model.createResource(next_uri)
#Check if this uri is the subject of any statements.
sub_selector = SimpleSelector(resource, None, None)
sub_iter = model.listStatements(sub_selector)
if sub_iter.hasNext():
sub_iter.close()
continue
else:
#Check if this uri is the object of any statements.
obj_selector = SimpleSelector(None, None, resource)
obj_iter = model.listStatements(obj_selector)
if obj_iter.hasNext():
obj_iter.close()
continue
else:
break
return next_uri
def sparql_select(self, q):
import json, StringIO
query = QueryFactory.create(String(q))
qexec = QueryExecutionFactory.create(query, self.vivo_model)
results = qexec.execSelect()
#sparql_text = ResultSetFormatter.asText(results)
result_list = ResultSetFormatter.toList(results)
#j = StringIO.StringIO(ResultSetFormatter.outputAsJSON(results))
qexec.close()
return result_list
def get_or_create_information_resource(self, publication):
"""
Return a tuple with a URI to either an existing InformationResource
or a boolean indicating whether this is a new resource.
<http://dvivocit01.services.brown.edu/individual/n3982250>
a vivo:InformationResource ;
rdfs:label "Atmospheric Environment" ."""
info_resource_query = """
PREFIX vivo: <http://vivoweb.org/ontology/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select ?ir_uri
where {
?ir_uri a vivo:InformationResource.
?ir_uri rdfs:label ?label
FILTER (?label= "%s").
}
LIMIT 1
""" % publication
results = self.sparql_select(info_resource_query)
result_iter = results.listIterator()
while True:
if result_iter.hasNext():
next_result = result_iter.next()
uri = next_result.get('?ir_uri').toString()
return (uri, False)
else:
break
#If we are here, that means it couldn't be found. Return
#next uri.
next = self.get_next_uri()
return (next, True)
def close(self):
self.vivo_model.close()
self.store.close()
self.conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment