Created
October 9, 2021 12:24
-
-
Save duhaime/ac5625c71a6b329f58f6359d3b359942 to your computer and use it in GitHub Desktop.
DBPedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from SPARQLWrapper import SPARQLWrapper, JSON | |
import html, datetime, os, json, glob, time | |
def send_sparql_query(q, timeout=None, sleep=0): | |
time.sleep(sleep) | |
try: | |
sparql = SPARQLWrapper("http://dbpedia.org/sparql") | |
sparql.setReturnFormat(JSON) | |
if timeout: sparql.setTimeout(timeout) | |
sparql.setQuery(q) | |
return sparql.query().convert() | |
except Exception as exc: | |
print(' * query failed', exc) | |
if sleep < 180: | |
if not sleep: sleep = 1 | |
else: sleep *= 2 | |
return send_sparql_query(q, timeout=timeout, sleep=sleep) | |
def get_bulk_dbpedia_metadata(limit=1000, offset=0): | |
query = ''' | |
PREFIX dbo: <http://dbpedia.org/ontology/> | |
PREFIX dbp: <http://dbpedia.org/property/> | |
SELECT * WHERE { | |
?person a dbo:Person; dbp:name | |
?name FILTER (lang(?name) = 'en') | |
OPTIONAL { ?person dbo:birthDate ?birth_date } | |
OPTIONAL { ?person dbo:deathDate ?death_date } | |
OPTIONAL { ?person dbo:thumbnail ?thumbnail } | |
OPTIONAL { ?person dbo:abstract ?abstract FILTER (lang(?abstract) = 'en') } | |
} LIMIT ''' + str(limit) + ''' OFFSET ''' + str(offset) | |
j = send_sparql_query(query) | |
l = [{k: i[k]['value'] for k in i} for i in j['results']['bindings']] | |
return l | |
l = [] | |
limit = 1000 | |
for i in range(2500): | |
print(' * fetching page', i) | |
l += get_bulk_dbpedia_metadata(limit=limit, offset=limit * i) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment