Last active
December 12, 2024 20:30
-
-
Save odewahn/83265d5641ce9f469295fde3bce769d9 to your computer and use it in GitHub Desktop.
Grab content from ORM api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import aiohttp | |
import sys | |
import asyncio | |
import os | |
# Before you start, get a content token from https://learning.oreilly.com/account/api-tokens/ | |
# You'll need the account manager role on your user membership. If you're not an account manager | |
# then you'll need to request on from the U&A team of someone who can create tokens | |
# Then create an environment variable called ORM_AUTH_TOKEN with the token as the value | |
async def async_fetch_url(session, url, format="json"): | |
headers = {"Authorization": f"Token {os.getenv('ORM_AUTH_TOKEN')}"} | |
async with session.get(url, headers=headers) as r: | |
if format == "html": | |
return await r.text() | |
else: | |
return await r.json() | |
# Grab the metadata for a book, then get all metadata for each chapter, then get the content of each chapter | |
async def fetch_book(work): | |
async with aiohttp.ClientSession() as session: | |
metadata = await async_fetch_url( | |
session, f"https://learning.oreilly.com/api/v1/book/{work}/" | |
) | |
# Fetch the metadata about each chapter | |
chapters_metadata = await asyncio.gather( | |
*[async_fetch_url(session, url) for url in metadata["chapters"]] | |
) | |
# Fetch content of each chapter based on the medata file. This maps 1:1 to the metadata | |
chapters_content = await asyncio.gather( | |
*[ | |
async_fetch_url(session, chapter["content"], "html") | |
for chapter in chapters_metadata | |
] | |
) | |
return metadata, chapters_metadata, chapters_content | |
if __name__ == "__main__": | |
metadata, chapters_metadata, chapters_content = asyncio.run(fetch_book(sys.argv[1])) | |
for chapter in chapters_content: | |
print(chapter) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment