-
-
Save davosian/cdda3f88b3307014e32b46d61236133d to your computer and use it in GitHub Desktop.
rhasspy slot program to get german groceries from wiktionary into rhasspy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python3 | |
try: | |
# For Python 3.0 and later | |
from urllib.request import urlopen | |
from urllib.parse import quote | |
except ImportError: | |
# Fall back to Python 2's urllib2 | |
from urllib2 import urlopen | |
from urllib2 import quote | |
import json | |
def get_jsonparsed_data(url): | |
""" | |
Receive the content of ``url``, parse it as JSON and return the object. | |
Parameters | |
---------- | |
url : str | |
Returns | |
------- | |
dict | |
""" | |
response = urlopen(url) | |
data = response.read().decode("utf-8") | |
return json.loads(data) | |
appendices = [ | |
"Verzeichnis:Deutsch/Essen_und_Trinken/Lebensmittel", | |
"Verzeichnis:Deutsch/Essen_und_Trinken/Obst_und_Gem%C3%BCse", | |
"Verzeichnis:Deutsch/Essen_und_Trinken/Speisen", | |
"Verzeichnis:Deutsch/Essen_und_Trinken/Getr%C3%A4nke" | |
] | |
base_url = "https://de.wiktionary.org/w/api.php?format=json&action=query&titles={}&prop=links&formatversion=2&pllimit=500" | |
plcont = "&plcontinue={}" | |
words = [] | |
for appendix in appendices: | |
# print(appendix) | |
url = base_url.format(appendix) | |
url2 = url | |
while True: | |
# print(url2) | |
res = get_jsonparsed_data(url2) | |
# print(res) | |
words += [d["title"] for d in res["query"]["pages"][0]["links"] if ":" not in d["title"]] | |
if not "continue" in res: | |
break; | |
url2 = url + plcont.format(quote(res["continue"]["plcontinue"])) | |
print("\n".join(words)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment