Skip to content

Instantly share code, notes, and snippets.

@the-code-rider
Created December 8, 2025 07:55
Show Gist options
  • Select an option

  • Save the-code-rider/3d7b5e140cf416b5c5c6a94ff962c3c2 to your computer and use it in GitHub Desktop.

Select an option

Save the-code-rider/3d7b5e140cf416b5c5c6a94ff962c3c2 to your computer and use it in GitHub Desktop.
scrape books name and cover from goodreads profile

install dependencies

pip install requests beautifulsoup4

run the script

python goodreads-list-scrape.py > books_data.js

important

make sure you update GOODREADS_SHELF_URL

import time
import json
import sys
import requests
from bs4 import BeautifulSoup
GOODREADS_SHELF_URL = "https://www.goodreads.com/review/list/<<add-your-own-here>>"
SHELF_NAME = "read"
MAX_PAGES = 20
REQUEST_DELAY = 1.0
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; GoodreadsShelfScraper/1.0; +https://luckysingh.net)"
}
def is_book_cover(img):
src = img.get("src") or ""
alt = img.get("alt", "").strip()
if not alt:
return False
if "gr-assets.com" not in src:
return False
if "/books/" not in src:
return False
return True
def normalize_cover_url(src):
return src
def fetch_page(session, page):
params = {
"shelf": SHELF_NAME,
"page": page,
}
resp = session.get(GOODREADS_SHELF_URL, params=params, headers=HEADERS, timeout=20)
resp.raise_for_status()
return resp.text
def extract_books_from_html(html):
soup = BeautifulSoup(html, "html.parser")
books = []
for img in soup.find_all("img"):
if not is_book_cover(img):
continue
title = img.get("alt", "").strip()
src = normalize_cover_url(img.get("src", "").strip())
if title and src:
books.append({"title": title, "cover": src})
seen = set()
unique = []
for b in books:
key = (b["title"], b["cover"])
if key not in seen:
seen.add(key)
unique.append(b)
return unique
def scrape_all_books():
session = requests.Session()
all_books = []
seen_keys = set()
for page in range(1, MAX_PAGES + 1):
html = fetch_page(session, page)
books = extract_books_from_html(html)
if not books:
break
new_count = 0
for b in books:
key = (b["title"], b["cover"])
if key not in seen_keys:
seen_keys.add(key)
all_books.append(b)
new_count += 1
if new_count == 0:
break
time.sleep(REQUEST_DELAY)
return all_books
def print_books_js(books):
print("const BOOKS = [")
for b in books:
title = json.dumps(b["title"])
cover = json.dumps(b["cover"])
print(f" {{ title: {title}, cover: {cover} }},")
print("];")
def main():
try:
books = scrape_all_books()
except Exception as e:
print(f"Error while scraping: {e}", file=sys.stderr)
sys.exit(1)
print_books_js(books)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment