Last active
March 15, 2023 16:29
-
-
Save josemarcosrf/392b96686748f550a3fe3af8da3827c5 to your computer and use it in GitHub Desktop.
Export Calibre library to find each book's ISBN with Google Book API and export to JSON or CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import click | |
| import json | |
| import requests | |
| import sys | |
| import time | |
| import pandas as pd | |
| from tqdm.auto import tqdm | |
| from rich.progress import track | |
| from rich.console import Console | |
| from rich.traceback import install | |
| install() | |
| console = Console() | |
| def call_books_api(title, authors): | |
| api_url = "https://www.googleapis.com/books/v1/volumes" | |
| def _get_identifier(id_name): | |
| _id = [ | |
| inf["identifier"] | |
| for inf in volum["industryIdentifiers"] | |
| if inf["type"] == id_name | |
| ] | |
| if _id: | |
| return _id[0] | |
| return None | |
| info = {} | |
| try: | |
| resp = requests.get(api_url, params=[("q", title), ("inauthor", authors)]) | |
| resp.raise_for_status() | |
| results = resp.json() | |
| volum = results["items"][0]["volumeInfo"] | |
| for k in {"ISBN_10", "ISBN_13"}: | |
| info[k] = _get_identifier(k) | |
| for k in {"categories", "publisher"}: | |
| v = volum.get(k) | |
| if isinstance(v, list): | |
| v = ", ".join(v) | |
| info[k] = v | |
| return info | |
| except Exception as e: | |
| console.log(f"[red]Error calling books API: {e}[/red]") | |
| return info | |
| @click.group() | |
| def cli(): | |
| pass | |
| # click.echo(f"dummy cli 🤘🏻") | |
| @cli.command("calibre") | |
| @click.argument( | |
| "calibre_json_file", | |
| # help="Calibre library JSON export file." | |
| ) | |
| def augment_calibre_library_data(calibre_json_file: str): | |
| """ | |
| Retrieves the ISBN_10 & ISBN_13 for each book in a Calibre library dump. | |
| To get the Calibre library dump: | |
| calibredb list -f title,authors,isbn,publisher,rating --for-machine \ | |
| > calibre-library-`date "+%Y-%m-%d"`.json | |
| """ | |
| with open(calibre_json_file, "r") as f: | |
| library = json.load(f) | |
| library = sorted(library, key=lambda x: x["title"]) | |
| for i, book in enumerate(library): | |
| title = book.get("title") | |
| authors = book.get("authors") | |
| console.log(f"Searching [bold]{title}[/bold] ({authors})") | |
| # Call Google Book API to find ISBn numbers | |
| book_info = call_books_api(title, authors) | |
| console.log(f"[magenta]{book_info}[/magenta]") | |
| library[i].update(book_info) | |
| time.sleep(0.5) | |
| console.log(f"[green]Writing updated library file[/green]") | |
| with open(lib_json, "w", encoding="utf-8") as f: | |
| f.write(json.dumps(library, indent=2, ensure_ascii=True)) | |
| df = pd.DataFrame.from_dict(library) | |
| df.fillna("<unk>") | |
| del df["id"] | |
| df.to_csv(lib_json.replace(".json", ".csv"), index=False) | |
| @cli.command("books") | |
| @click.argument( | |
| "libcsv", | |
| # help="CSV file with the book list. Must have 'Author' and 'Title' columns", | |
| ) | |
| @click.argument( | |
| "outfile", | |
| # help="output CSV file" | |
| ) | |
| def fetch_book_data(libcsv: str, outfile: str): | |
| """ | |
| Retrieves the ISBN_10 & ISBN_13 for each book in a CSV book list. | |
| The CSV Must have two columns: Author and Title | |
| """ | |
| _cols = ["Author", "Title"] | |
| # Read the CSV | |
| libdf = pd.read_csv(libcsv) | |
| headers = list(libdf) | |
| if not all([h in headers for h in _cols]): | |
| raise ValueError(f"CSV file must have the following columns: {_cols}") | |
| book_data = [] | |
| for _, row in tqdm(libdf.iterrows(), total=len(libdf)): | |
| if isinstance(row.Author, str): | |
| authors = row.Author.split(",") | |
| if isinstance(authors, str): | |
| authors = [authors] | |
| bdata = call_books_api(row.Title, authors) | |
| bdata.update( | |
| {"authors": row.Author, "title": row.Title, "completed": 0, "notes": ""} | |
| ) | |
| book_data.append(bdata) | |
| wdf = pd.DataFrame.from_dict(book_data) | |
| wdf.to_csv(outfile) | |
| if __name__ == "__main__": | |
| cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment