Last active
January 9, 2022 10:38
-
-
Save putnik/7dcb92c0168392ec8b1c718093e36d1a to your computer and use it in GitHub Desktop.
Generate a sorted list of awards from Wikidata
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2018-2022 Sergey Leschina <[email protected]> | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import copy | |
from collections import deque | |
from requests import post | |
from SPARQLWrapper import SPARQLWrapper, JSON, POSTDIRECTLY | |
from urllib.parse import quote | |
def append_value(graph, key, value): | |
if key == 'Q19798647': | |
raise ValueError("'No value' instead of novalue: %s" % value) | |
if value == 'Q19798647': | |
raise ValueError("'No value' instead of novalue: %s" % key) | |
if key is None or value is None: | |
return | |
if not key in graph: | |
graph[key] = [] | |
try: | |
graph[key].index(value) | |
except Exception as e: | |
graph[key].append(value) | |
def connected_components(graph): | |
components = [] | |
seen = set() | |
full_graph = copy.deepcopy(graph) | |
for key in graph: | |
for value in graph[key]: | |
append_value(full_graph, value, key) | |
def dfs(v): | |
vset = set([v]) | |
component = [] | |
while vset: | |
v = vset.pop() | |
seen.add(v) | |
vset |= set(full_graph.get(v, ())) - seen | |
component.append(v) | |
return component | |
for v in full_graph: | |
if v not in seen: | |
component_keys = dfs(v) | |
component = { key: graph.get(key, []) for key in component_keys } | |
components.append(component) | |
return components | |
def error_message(v, path): | |
query = ''' | |
#defaultView:Graph | |
SELECT ?item ?itemLabel ?linkTo ?linkToLabel WHERE { | |
{ | |
VALUES ?item { wd:%s wd:%s }. | |
?item p:P3730 ?pHigher . | |
?pHigher ps:P3730 ?linkTo . | |
FILTER NOT EXISTS { ?pHigher pq:P582 ?end . } | |
} UNION { | |
VALUES ?linkTo { wd:%s wd:%s }. | |
?linkTo p:P3729 ?pLower . | |
?pLower ps:P3729 ?item . | |
FILTER NOT EXISTS { ?pLower pq:P582 ?end . } | |
} | |
SERVICE wikibase:label { | |
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". | |
} | |
} | |
'''.strip() % (v, " wd:".join(path), v, " wd:".join(path)) | |
sparql_url = 'https://query.wikidata.org/#%s' % quote(query) | |
api_url = 'https://meta.wikimedia.org/w/api.php?action=shortenurl&format=json' | |
response = post(api_url, {'url': sparql_url}) | |
json_response = response.json() | |
try: | |
url = json_response['shortenurl']['shorturl'] | |
except KeyError: | |
print("URL Shortener error: %s" % json_response['error']['info']) | |
url = sparql_url | |
return "Cycle (%s): %s\nGraph: %s" % (v, " -> ".join(path), url) | |
def topological_sort(graph): | |
VISITED, FINISHED = 0, 1 | |
order = deque() | |
state = {} | |
stack = [] | |
def dfs(v): | |
stack.append(v) | |
state[v] = VISITED | |
for u in graph.get(v, ()): | |
if state.get(u, None) == VISITED: | |
path = stack[stack.index(u):stack.index(v)] | |
raise ValueError(error_message(v, path)) | |
if state.get(u, None) == None: | |
dfs(u) | |
state[v] = FINISHED | |
order.append(v) | |
stack.pop(-1) | |
for v in graph: | |
if state.get(v, None) == None: | |
dfs(v) | |
return order | |
def raw_value(value): | |
if value is None: | |
return None | |
return value["value"].replace("http://www.wikidata.org/entity/", "") | |
def load_data(): | |
sparql = SPARQLWrapper("https://query.wikidata.org/sparql") | |
sparql.setQuery(""" | |
SELECT DISTINCT ?item ?lower ?higher ?country | |
WHERE { | |
{ ?item wdt:P31/wdt:P279* wd:Q618779 } | |
UNION | |
{ ?item wdt:P31/wdt:P279* wd:Q38033430 } | |
. | |
{ | |
?item p:P3729 ?pLower . | |
?pLower ps:P3729 ?lower . | |
FILTER NOT EXISTS { ?pLower pq:P582 ?end . } | |
} UNION { | |
?item p:P3730 ?pHigher . | |
?pHigher ps:P3730 ?higher . | |
FILTER NOT EXISTS { ?pHigher pq:P582 ?end . } | |
} | |
. | |
OPTIONAL { ?item wdt:P17 ?country . } | |
} | |
ORDER BY ?country | |
""") | |
sparql.setRequestMethod(POSTDIRECTLY) | |
sparql.setReturnFormat(JSON) | |
data = [] | |
for result in sparql.query().convert()["results"]["bindings"]: | |
data.append({ | |
"item": raw_value(result['item']), | |
"lower": raw_value(result.get('lower', None)), | |
"higher": raw_value(result.get('higher', None)), | |
}) | |
return data | |
def construct_graph(): | |
graph = {} | |
data = load_data() | |
for row in data: | |
append_value(graph, row["lower"], row["item"]) | |
append_value(graph, row["item"], row["higher"]) | |
return graph | |
try: | |
graph = construct_graph() | |
components = connected_components(graph) | |
sorted_list = [] | |
for component in components: | |
sorted_list += topological_sort(component) | |
prefix = "\t\"data\": [\n\t\t[\n\t\t\t\"" | |
body = "\"\n\t\t],\n\t\t[\n\t\t\t\"".join(sorted_list) | |
postfix = "\"\n\t\t]\n\t]" | |
print(prefix + body + postfix) | |
except ValueError as e: | |
print(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment