Last active
December 19, 2015 18:28
-
-
Save n-kb/5998545 to your computer and use it in GitHub Desktop.
This simple script roughly converts an OWL file created by Protégé into a models.py file to be used with Neo4Django. Comments/improvements very welcome!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
# This string will contain the models.py file | |
modelsContents = "from neo4django.db import models\n\n" | |
# Enter the name of the OWL file to parse | |
# The relationships in the file should always start with has... | |
owlFile = "ontology.owl" | |
# Gives the ontology URI. Only needed for documentation purposes | |
ontologyURI = "http://www.semanticweb.org/nkb/ontologies/2013/6/impact-investment#" | |
# Adds a comment in the models.py file | |
modelsContents += "# The ontology can be found in its entirety at " + ontologyURI + "\n" | |
# Defines the owl and rdf namespaces | |
namespaces = { | |
'owl': 'http://www.w3.org/2002/07/owl#', | |
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
'rdfs': 'http://www.w3.org/2000/01/rdf-schema#' | |
} | |
# This array contains the correspondance between data types | |
correspondanceTypes = { | |
"string" : "StringProperty", | |
"anyURI" : "URLProperty", | |
"int" : "IntegerProperty", | |
"nonNegativeInteger" : "IntegerProperty", | |
"nonPositiveInteger" : "IntegerProperty", | |
"PositiveInteger" : "IntegerProperty", | |
"NegativeInteger" : "IntegerProperty", | |
"integer" : "IntegerProperty", | |
"dateTimeStamp" : "DateTimeProperty", | |
"dateTime" : "DateTimeProperty", | |
"string" : "StringArrayProperty", | |
"boolean" : "BooleanProperty" | |
} | |
# Parses the file with etree | |
tree = etree.parse(owlFile) | |
root = tree.getroot() | |
# Finds all the Classes | |
for ontologyClassElement in root.findall("owl:Class", namespaces): | |
# Defines the array that contains the class information | |
ontologyClass = {} | |
# Finds the URI of the class | |
classURI = ontologyClassElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about"] | |
#Finds the name of the class | |
className = classURI.split("#")[1] | |
# By default, the class has no parent | |
parentClass = "models.NodeModel" | |
# Declares an array to store the relationships and properties from this class | |
relations = [] | |
properties = [] | |
# Finds all the subClasses of the Class | |
for subClassElement in ontologyClassElement.findall("rdfs:subClassOf", namespaces): | |
# If the Class is actually an extension of another Class | |
if "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource" in subClassElement.attrib: | |
parentClassURI = subClassElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"] | |
parentClass = parentClassURI.split("#")[1] | |
else: | |
for restriction in subClassElement.findall("owl:Restriction", namespaces): | |
# If there is a relationship defined in the subclass | |
if restriction.find("owl:onClass", namespaces) is not None: | |
# Finds the relationship and its elements (destination Class and type) | |
relationClass = restriction.find("owl:onClass", namespaces) | |
relation = {} | |
relation["URI"] = relationClass.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"] | |
relation["name"] = relation["URI"].split("#")[1] | |
# Exception when the relation's destination is an individual from the same class | |
if relation["name"] == className: | |
relation["name"] = 'self' | |
relationType = restriction.find("owl:onProperty", namespaces) | |
relationTypeURI = relationType.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"] | |
relation["type"] = relationTypeURI.split("#")[1] | |
# Guesses the destination of the relation based on the name. Name should be "has..." | |
if relation["type"].find('has') == 0: | |
relation["destination"] = relation["type"][3:].lower() | |
# Adds the relationship to the array containing all relationships for the class only if the relation has a destination | |
if "destination" in relation: | |
relations.append(relation) | |
# If there is a property defined in the subclass | |
elif restriction.find("owl:onDataRange", namespaces) is not None or restriction.find("owl:someValuesFrom", namespaces) is not None: | |
propertyTypeElement = restriction.find("owl:onProperty", namespaces) | |
propertyTypeURI = propertyTypeElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"] | |
propertyType = propertyTypeURI.split("#")[1] | |
if restriction.find("owl:onDataRange", namespaces) is not None: | |
dataTypeElement = restriction.find("owl:onDataRange", namespaces) | |
else: | |
dataTypeElement = restriction.find("owl:someValuesFrom", namespaces) | |
dataTypeURI = dataTypeElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"] | |
dataType = correspondanceTypes[dataTypeURI.split("#")[1]] | |
prop = { | |
"name" : propertyType, | |
"type" : dataType | |
} | |
properties.append(prop) | |
# Writes the class in models.py | |
modelsContents += "\n class "+ className +"(" + parentClass + "):\n" | |
# Writes the properties | |
for prop in properties: | |
modelsContents += "\t" + prop["name"] + " = models." + prop["type"] + "()\n" | |
# Writes the relationships | |
for relation in relations: | |
modelsContents += "\t" + relation["destination"] + " = models.Relationship(" + relation["name"] + ",rel_type='" + relation["type"] + "')\n" | |
print modelsContents |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment