Skip to content

Instantly share code, notes, and snippets.

@duhaime
Created October 9, 2021 12:24
Show Gist options
  • Save duhaime/ac5625c71a6b329f58f6359d3b359942 to your computer and use it in GitHub Desktop.
Save duhaime/ac5625c71a6b329f58f6359d3b359942 to your computer and use it in GitHub Desktop.
DBPedia
from SPARQLWrapper import SPARQLWrapper, JSON
import html, datetime, os, json, glob, time
def send_sparql_query(q, timeout=None, sleep=0):
time.sleep(sleep)
try:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setReturnFormat(JSON)
if timeout: sparql.setTimeout(timeout)
sparql.setQuery(q)
return sparql.query().convert()
except Exception as exc:
print(' * query failed', exc)
if sleep < 180:
if not sleep: sleep = 1
else: sleep *= 2
return send_sparql_query(q, timeout=timeout, sleep=sleep)
def get_bulk_dbpedia_metadata(limit=1000, offset=0):
query = '''
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbp: <http://dbpedia.org/property/>
SELECT * WHERE {
?person a dbo:Person; dbp:name
?name FILTER (lang(?name) = 'en')
OPTIONAL { ?person dbo:birthDate ?birth_date }
OPTIONAL { ?person dbo:deathDate ?death_date }
OPTIONAL { ?person dbo:thumbnail ?thumbnail }
OPTIONAL { ?person dbo:abstract ?abstract FILTER (lang(?abstract) = 'en') }
} LIMIT ''' + str(limit) + ''' OFFSET ''' + str(offset)
j = send_sparql_query(query)
l = [{k: i[k]['value'] for k in i} for i in j['results']['bindings']]
return l
l = []
limit = 1000
for i in range(2500):
print(' * fetching page', i)
l += get_bulk_dbpedia_metadata(limit=limit, offset=limit * i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment