Skip to content

Instantly share code, notes, and snippets.

@shinaisan
Created October 23, 2016 06:06
Show Gist options
  • Save shinaisan/95b8d84095c8af818fa3af668a7d0310 to your computer and use it in GitHub Desktop.
Save shinaisan/95b8d84095c8af818fa3af668a7d0310 to your computer and use it in GitHub Desktop.
DBpedia Ontology Extraction
from rdflib import Graph
def ontology(file_name):
g = Graph()
index = file_name.rfind('.')
if index < 0:
return None
type = file_name[index + 1:]
g.parse(file_name, format = type)
prefix = 'http://dbpedia.org/ontology/'
return (str(s)[len(prefix):] for s, p, o in g \
if str(s).startswith(prefix) and len(str(s)) > len(prefix))
def write(src_file_name, trg_file_name):
g = ontology(src_file_name)
s = set(g)
with open(trg_file_name, 'w') as file:
for o in s:
file.write(o)
file.write("\n")
def test():
# Input files can be downloaded from DBpedia.
print('Processing nt.')
write('dbpedia_2015-04.nt', 'tmp1.out')
print('Processing owl.')
write('dbpedia_2015-04.xml', 'tmp2.out')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment