Created
July 2, 2017 17:05
-
-
Save ettorerizza/9c6f3995b4d2ba8eb13e4bff87d4792f to your computer and use it in GitHub Desktop.
OpenRefine/Jython sparql query (find possible locations and persons in tokens)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
sys.path.append(r'D:\jython2.7.0\Lib\site-packages') | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
from langdetect import detect | |
dbpedia_version = "http://dbpedia.org/sparql" | |
#TEST | |
value = "comptoir" | |
#detect language (useless with short tokens) | |
lang_query = detect(value) | |
if lang_query == "fr": | |
dbpedia = "http://fr.dbpedia.org/sparql" | |
elif lang_query == "nl": | |
dbpedia = "http://nl.dbpedia.org/sparql" | |
else: | |
dbpedia = "http://dbpedia.org/sparql" | |
def get_sparql_label(value, dbpedia_version): | |
dbpedia_version = dbpedia | |
sparql = SPARQLWrapper(dbpedia_version) | |
sparql.setQuery(""" | |
SELECT DISTINCT ?entity ?score1 ?type | |
WHERE{ | |
?entity ?p ?label. | |
?entity ?q ?abstract. | |
Filter langMatches(lang(?label),"%s"). | |
Filter langMatches(lang(?abstract),"%s"). | |
?label <bif:contains> "'%s'" OPTION(score ?score1). | |
FILTER (?p=<http://www.w3.org/2000/01/rdf-schema#label> || | |
?p=<http://www.w3.org/2004/02/skos/core#prefLabel>). | |
FILTER (?q=<http://dbpedia.org/ontology/abstract>). | |
?entity a ?type. | |
FILTER (?type IN (<http://dbpedia.org/ontology/Place>, | |
<http://dbpedia.org/ontology/Agent>)). | |
FILTER isIRI(?entity). | |
} ORDER BY desc(?score1) LIMIT 5 | |
""" % (lang_query, lang_query, value)) | |
sparql.setReturnFormat(JSON) | |
results=sparql.query().convert() | |
return results | |
results=get_sparql_label(value, dbpedia) | |
liste=[] | |
for result in results["results"]["bindings"]: | |
liste.append(result["type"]["value"] + "||" + result["entity"]["value"]) | |
if not liste: | |
dbpedia_version="http://nl.dbpedia.org/sparql" | |
lang_query = "NL" | |
results=get_sparql_label(value, dbpedia_version) | |
for result in results["results"]["bindings"]: | |
liste.append(result["type"]["value"] + | |
"||" + result["entity"]["value"]) | |
print(liste) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment