Created
August 14, 2020 22:29
-
-
Save bowmanjd/71da19ad4544e9dea0e8b165eda71c4b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Proof-of-concept asynchronous Wikipedia search tool.""" | |
import asyncio | |
import logging | |
import time | |
import httpx | |
EMAIL = "your_email@provider" # or Github URL or other identifier | |
USER_AGENT = {"user-agent": f"pypedia/0.1.0 ({EMAIL})"} | |
logging.basicConfig(filename="asyncpedia.log", filemode="w", level=logging.INFO) | |
LOG = logging.getLogger("asyncio") | |
async def search(query, limit=100, client=None): | |
"""Search Wikipedia, returning a JSON list of pages.""" | |
if client: | |
close_client = False | |
else: | |
client = httpx.AsyncClient() | |
close_client = True | |
LOG.info(f"Start query '{query}': {time.strftime('%X')}") | |
url = "https://en.wikipedia.org/w/rest.php/v1/search/page" | |
params = {"q": query, "limit": limit} | |
response = await client.get(url, params=params) | |
if close_client: | |
await client.aclose() | |
LOG.info(f"End query '{query}': {time.strftime('%X')}") | |
return response | |
async def list_articles(queries): | |
"""Execute several Wikipedia searches.""" | |
async with httpx.AsyncClient(headers=USER_AGENT) as client: | |
tasks = [search(query, client=client) for query in queries] | |
responses = await asyncio.gather(*tasks) | |
results = (response.json()["pages"] for response in responses) | |
return dict(zip(queries, results)) | |
def run(): | |
queries = [ | |
"linksto:Python_(programming_language)", | |
"incategory:Computer_programming", | |
"incategory:Programming_languages", | |
"incategory:Python_(programming_language)", | |
"incategory:Python_web_frameworks", | |
"incategory:Python_implementations", | |
"incategory:Programming_languages_created_in_1991", | |
"incategory:Computer_programming_stubs", | |
] | |
results = asyncio.run(list_articles(queries)) | |
for query, articles in results.items(): | |
print(f"\n*** {query} ***") | |
for article in articles: | |
print(f"{article['title']}: {article['excerpt']}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment