Skip to content

Instantly share code, notes, and snippets.

@bemitc
Last active March 14, 2026 13:15
Show Gist options
  • Select an option

  • Save bemitc/fcbfaad89c3518eddab992f69ff51ad4 to your computer and use it in GitHub Desktop.

Select an option

Save bemitc/fcbfaad89c3518eddab992f69ff51ad4 to your computer and use it in GitHub Desktop.
simple downloader class for bloom library
import requests
import json
from urllib.parse import urlparse, urlunparse
import os
# Just a simple downloader
class BloomDownloader(object):
def __init__(self):
self._api_url = "https://server.bloomlibrary.org/parse/classes/books"
self._headers = {
"accept": "application/json, text/plain, */*",
"content-type": "text/json",
"origin": "https://bloomlibrary.org",
"referer": "https://bloomlibrary.org/",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
"x-parse-application-id": "R6qNTeumQXjJCMutAJYAwPtip1qBulkFyLefkCE5"
}
self._books = []
self._level = 1
self._lang = ""
def get_books(self, lang, level=1):
self._lang = lang
self._books = []
self._level = level
in_levels = [f"computedLevel:{level}", f"level:{level}"]
nin_levels = ["level:1", "level:2", "level:3", "level:4"]
if f'level:{level}' in nin_levels:
nin_levels.remove(f'level:{level}')
limit = 100
skip = 0
while True:
payload = {
"_method": "GET",
"keys": "title,baseUrl,objectId,tags",
"include": "langPointers",
"limit": limit,
"skip": skip,
"where": {
"langPointers": {
"$inQuery": {
"where": {"isoCode": lang},
"className": "language"
}
},
"$and": [
{"tags": {"$in": in_levels}},
{"tags": {"$nin": nin_levels}},
],
"inCirculation": True,
"draft": False,
"rebrand": False,
"baseUrl": {"$exists": True}
},
"order": "-createdAt"
}
response = requests.post(self._api_url, headers=self._headers, json=payload)
if response.status_code == 200:
data = response.json()
results = data.get("results", [])
if not results:
break
self._books.extend(results)
skip += limit
else:
break
def get_book_urls(self):
book_epubs = []
for book in self._books:
title = book.get("title")
objectId = book.get("objectId")
tags = book.get("tags", [])
topic_tag = next((t for t in tags if t.startswith("topic:")), "")
topic = topic_tag.replace("topic:", "")
base_url = (book.get("baseUrl") or "").replace('%2f', '/')
parsed = urlparse(base_url)
parts = parsed.path.split('/')
parts.insert(-2, 'epub')
parts[-2] = parts[-2] + '.epub'
parts.pop()
new_path = '/'.join(parts)
new_path = new_path.replace('BloomLibraryBooks', 'bloomharvest')
new_url_obj = parsed._replace(path=new_path)
book_epubs.append(
{
"id": objectId,
"topic": topic,
"title": title,
"epub": urlunparse(new_url_obj),
"bloom_url": f"https://bloomlibrary.org/language:{self._lang}/book/{objectId}?lang={self._lang}"
}
)
return book_epubs
def dump_json(self):
return json.dumps(self._books, indent=4)
def download_books(self, save_dir=f"bloom"):
save_dir = save_dir + f'/{self._lang}/{self._level}'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
books_to_download = self.get_book_urls()
for i, book in enumerate(books_to_download, 1):
url = book['epub']
filename = f'{book["id"]}.epub'
filepath = os.path.join(save_dir, filename)
try:
with requests.get(url, stream=True, timeout=20) as r:
if r.status_code == 200:
with open(filepath, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
else:
print(f" FAILED: Status {r.status_code} for {url}")
except Exception as e:
print(f" ERROR downloading {book['title']}: {e}")
bloom = BloomDownloader()
bloom.get_books('ceb', 1)
bloom.download_books()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment