Skip to content

Instantly share code, notes, and snippets.

@josemarcosrf
Last active March 15, 2023 16:29
Show Gist options
  • Save josemarcosrf/392b96686748f550a3fe3af8da3827c5 to your computer and use it in GitHub Desktop.
Save josemarcosrf/392b96686748f550a3fe3af8da3827c5 to your computer and use it in GitHub Desktop.
Export Calibre library to find each book's ISBN with Google Book API and export to JSON or CSV file
import click
import json
import requests
import sys
import time
import pandas as pd
from tqdm.auto import tqdm
from rich.progress import track
from rich.console import Console
from rich.traceback import install
install()
console = Console()
def call_books_api(title, authors):
api_url = "https://www.googleapis.com/books/v1/volumes"
def _get_identifier(id_name):
_id = [
inf["identifier"]
for inf in volum["industryIdentifiers"]
if inf["type"] == id_name
]
if _id:
return _id[0]
return None
info = {}
try:
resp = requests.get(api_url, params=[("q", title), ("inauthor", authors)])
resp.raise_for_status()
results = resp.json()
volum = results["items"][0]["volumeInfo"]
for k in {"ISBN_10", "ISBN_13"}:
info[k] = _get_identifier(k)
for k in {"categories", "publisher"}:
v = volum.get(k)
if isinstance(v, list):
v = ", ".join(v)
info[k] = v
return info
except Exception as e:
console.log(f"[red]Error calling books API: {e}[/red]")
return info
@click.group()
def cli():
pass
# click.echo(f"dummy cli 🤘🏻")
@cli.command("calibre")
@click.argument(
"calibre_json_file",
# help="Calibre library JSON export file."
)
def augment_calibre_library_data(calibre_json_file: str):
"""
Retrieves the ISBN_10 & ISBN_13 for each book in a Calibre library dump.
To get the Calibre library dump:
calibredb list -f title,authors,isbn,publisher,rating --for-machine \
> calibre-library-`date "+%Y-%m-%d"`.json
"""
with open(calibre_json_file, "r") as f:
library = json.load(f)
library = sorted(library, key=lambda x: x["title"])
for i, book in enumerate(library):
title = book.get("title")
authors = book.get("authors")
console.log(f"Searching [bold]{title}[/bold] ({authors})")
# Call Google Book API to find ISBn numbers
book_info = call_books_api(title, authors)
console.log(f"[magenta]{book_info}[/magenta]")
library[i].update(book_info)
time.sleep(0.5)
console.log(f"[green]Writing updated library file[/green]")
with open(lib_json, "w", encoding="utf-8") as f:
f.write(json.dumps(library, indent=2, ensure_ascii=True))
df = pd.DataFrame.from_dict(library)
df.fillna("<unk>")
del df["id"]
df.to_csv(lib_json.replace(".json", ".csv"), index=False)
@cli.command("books")
@click.argument(
"libcsv",
# help="CSV file with the book list. Must have 'Author' and 'Title' columns",
)
@click.argument(
"outfile",
# help="output CSV file"
)
def fetch_book_data(libcsv: str, outfile: str):
"""
Retrieves the ISBN_10 & ISBN_13 for each book in a CSV book list.
The CSV Must have two columns: Author and Title
"""
_cols = ["Author", "Title"]
# Read the CSV
libdf = pd.read_csv(libcsv)
headers = list(libdf)
if not all([h in headers for h in _cols]):
raise ValueError(f"CSV file must have the following columns: {_cols}")
book_data = []
for _, row in tqdm(libdf.iterrows(), total=len(libdf)):
if isinstance(row.Author, str):
authors = row.Author.split(",")
if isinstance(authors, str):
authors = [authors]
bdata = call_books_api(row.Title, authors)
bdata.update(
{"authors": row.Author, "title": row.Title, "completed": 0, "notes": ""}
)
book_data.append(bdata)
wdf = pd.DataFrame.from_dict(book_data)
wdf.to_csv(outfile)
if __name__ == "__main__":
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment