josemarcosrf · March 15, 2023 16:29
diff --git a/calibre_library.py b/calibre_library.py
 import click
 import json
 import requests
 import sys
 import time

 import pandas as pd
 from tqdm.auto import tqdm
 from rich.progress import track
 from rich.console import Console
 from rich.traceback import install

 install()
 console = Console()


 def call_books_api(title, authors):
    api_url = "https://www.googleapis.com/books/v1/volumes"

    def _get_identifier(id_name):
        _id = [
            inf["identifier"]
            for inf in volum["industryIdentifiers"]
            if inf["type"] == id_name
        ]
        if _id:
            return _id[0]
        return None

    info = {}
    try:
        resp = requests.get(api_url, params=[("q", title), ("inauthor", authors)])
        resp.raise_for_status()
        results = resp.json()

        volum = results["items"][0]["volumeInfo"]

        for k in {"ISBN_10", "ISBN_13"}:
            info[k] = _get_identifier(k)

        for k in {"categories", "publisher"}:
            v = volum.get(k)
            if isinstance(v, list):
                v = ", ".join(v)

            info[k] = v

        return info
    except Exception as e:
        console.log(f"[red]Error calling books API: {e}[/red]")

    return info


 @click.group()
 def cli():
    pass
    # click.echo(f"dummy cli 🤘🏻")


 @cli.command("calibre")
 @click.argument(
    "calibre_json_file",
    # help="Calibre library JSON export file."
 )
 def augment_calibre_library_data(calibre_json_file: str):
    """
    Retrieves the ISBN_10 & ISBN_13 for each book in a Calibre library dump.
    To get the Calibre library dump:

        calibredb list -f title,authors,isbn,publisher,rating --for-machine \
            > calibre-library-`date "+%Y-%m-%d"`.json
    """
    with open(calibre_json_file, "r") as f:
        library = json.load(f)

    library = sorted(library, key=lambda x: x["title"])

    for i, book in enumerate(library):
        title = book.get("title")
        authors = book.get("authors")
        console.log(f"Searching [bold]{title}[/bold] ({authors})")

        # Call Google Book API to find ISBn numbers
        book_info = call_books_api(title, authors)
        console.log(f"[magenta]{book_info}[/magenta]")

        library[i].update(book_info)

        time.sleep(0.5)

    console.log(f"[green]Writing updated library file[/green]")
    with open(lib_json, "w", encoding="utf-8") as f:
        f.write(json.dumps(library, indent=2, ensure_ascii=True))

    df = pd.DataFrame.from_dict(library)
    df.fillna("<unk>")
    del df["id"]
    df.to_csv(lib_json.replace(".json", ".csv"), index=False)


 @cli.command("books")
 @click.argument(
    "libcsv",
    # help="CSV file with the book list. Must have 'Author' and 'Title' columns",
 )
 @click.argument(
    "outfile",
    # help="output CSV file"
 )
 def fetch_book_data(libcsv: str, outfile: str):
    """
    Retrieves the ISBN_10 & ISBN_13 for each book in a CSV book list.
    The CSV Must have two columns: Author and Title
    """
    _cols = ["Author", "Title"]

    # Read the CSV
    libdf = pd.read_csv(libcsv)
    headers = list(libdf)
    if not all([h in headers for h in _cols]):
        raise ValueError(f"CSV file must have the following columns: {_cols}")

    book_data = []
    for _, row in tqdm(libdf.iterrows(), total=len(libdf)):
        if isinstance(row.Author, str):
            authors = row.Author.split(",")
        if isinstance(authors, str):
            authors = [authors]
        bdata = call_books_api(row.Title, authors)
        bdata.update(
            {"authors": row.Author, "title": row.Title, "completed": 0, "notes": ""}
        )
        book_data.append(bdata)

    wdf = pd.DataFrame.from_dict(book_data)
    wdf.to_csv(outfile)


 if __name__ == "__main__":
    cli()
	import click
	import json
	import requests
	import sys
	import time

	import pandas as pd
	from tqdm.auto import tqdm
	from rich.progress import track
	from rich.console import Console
	from rich.traceback import install

	install()
	console = Console()


	def call_books_api(title, authors):
	api_url = "https://www.googleapis.com/books/v1/volumes"

	def _get_identifier(id_name):
	_id = [
	inf["identifier"]
	for inf in volum["industryIdentifiers"]
	if inf["type"] == id_name
	]
	if _id:
	return _id[0]
	return None

	info = {}
	try:
	resp = requests.get(api_url, params=[("q", title), ("inauthor", authors)])
	resp.raise_for_status()
	results = resp.json()

	volum = results["items"][0]["volumeInfo"]

	for k in {"ISBN_10", "ISBN_13"}:
	info[k] = _get_identifier(k)

	for k in {"categories", "publisher"}:
	v = volum.get(k)
	if isinstance(v, list):
	v = ", ".join(v)

	info[k] = v

	return info
	except Exception as e:
	console.log(f"[red]Error calling books API: {e}[/red]")

	return info


	@click.group()
	def cli():
	pass
	# click.echo(f"dummy cli 🤘🏻")


	@cli.command("calibre")
	@click.argument(
	"calibre_json_file",
	# help="Calibre library JSON export file."
	)
	def augment_calibre_library_data(calibre_json_file: str):
	"""
	Retrieves the ISBN_10 & ISBN_13 for each book in a Calibre library dump.
	To get the Calibre library dump:

	calibredb list -f title,authors,isbn,publisher,rating --for-machine \
	> calibre-library-`date "+%Y-%m-%d"`.json
	"""
	with open(calibre_json_file, "r") as f:
	library = json.load(f)

	library = sorted(library, key=lambda x: x["title"])

	for i, book in enumerate(library):
	title = book.get("title")
	authors = book.get("authors")
	console.log(f"Searching [bold]{title}[/bold] ({authors})")

	# Call Google Book API to find ISBn numbers
	book_info = call_books_api(title, authors)
	console.log(f"[magenta]{book_info}[/magenta]")

	library[i].update(book_info)

	time.sleep(0.5)

	console.log(f"[green]Writing updated library file[/green]")
	with open(lib_json, "w", encoding="utf-8") as f:
	f.write(json.dumps(library, indent=2, ensure_ascii=True))

	df = pd.DataFrame.from_dict(library)
	df.fillna("<unk>")
	del df["id"]
	df.to_csv(lib_json.replace(".json", ".csv"), index=False)


	@cli.command("books")
	@click.argument(
	"libcsv",
	# help="CSV file with the book list. Must have 'Author' and 'Title' columns",
	)
	@click.argument(
	"outfile",
	# help="output CSV file"
	)
	def fetch_book_data(libcsv: str, outfile: str):
	"""
	Retrieves the ISBN_10 & ISBN_13 for each book in a CSV book list.
	The CSV Must have two columns: Author and Title
	"""
	_cols = ["Author", "Title"]

	# Read the CSV
	libdf = pd.read_csv(libcsv)
	headers = list(libdf)
	if not all([h in headers for h in _cols]):
	raise ValueError(f"CSV file must have the following columns: {_cols}")

	book_data = []
	for _, row in tqdm(libdf.iterrows(), total=len(libdf)):
	if isinstance(row.Author, str):
	authors = row.Author.split(",")
	if isinstance(authors, str):
	authors = [authors]
	bdata = call_books_api(row.Title, authors)
	bdata.update(
	{"authors": row.Author, "title": row.Title, "completed": 0, "notes": ""}
	)
	book_data.append(bdata)

	wdf = pd.DataFrame.from_dict(book_data)
	wdf.to_csv(outfile)


	if __name__ == "__main__":
	cli()