Created
January 12, 2025 00:55
-
-
Save eristoddle/5a8e7dd0597d09d00aa5de066788c303 to your computer and use it in GitHub Desktop.
Python OSX Book app highlight and note exporter. This will work standalone to export highlights as markdown files or in Obsidian with the Python Scripter plugin.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import sqlite3 | |
import logging | |
import sys | |
from typing import List, Tuple, NamedTuple | |
ANNOTATION_DB_PATTERN = "~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation*.sqlite" | |
LIBRARY_DB_PATTERN = ( | |
"~/Library/Containers/com.apple.iBooksX/Data/Documents/BKLibrary/BKLibrary*.sqlite" | |
) | |
logging.basicConfig( | |
level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s" | |
) | |
class BookDetail(NamedTuple): | |
asset_id: str | |
title: str | |
author: str | None | |
description: str | None | |
epub_id: str | None | |
path: str | None | |
isbn: str | None | |
language: str | None | |
publisher: str | None | |
publication_date: str | None | |
rights: str | None | |
subjects: list[str] | None | |
cover: str | None | |
def sanitize_frontmatter(text: str) -> str: | |
if not text: | |
return "" | |
replacements = { | |
":": " -", | |
"[": "(", | |
"]": ")", | |
"{": "(", | |
"}": ")", | |
"#": "", | |
"|": "-", | |
">": "-", | |
"\\": "/", | |
"\n": " ", | |
"\r": " ", | |
} | |
result = str(text) | |
for char, replacement in replacements.items(): | |
result = result.replace(char, replacement) | |
result = " ".join(result.split()) | |
return result.strip() | |
def get_epub_metadata(epub_path: str): | |
try: | |
import ebooklib | |
from ebooklib import epub | |
import base64 | |
if not epub_path: | |
return None | |
try: | |
book = epub.read_epub(epub_path) | |
metadata = { | |
"isbn": next( | |
( | |
val | |
for _, val in book.get_metadata("DC", "identifier") | |
if isinstance(val, str) and "isbn" in val.lower() | |
), | |
None, | |
), | |
"language": next( | |
(val[0] for val in book.get_metadata("DC", "language")), None | |
), | |
"publisher": next( | |
(val[0] for val in book.get_metadata("DC", "publisher")), None | |
), | |
"publication_date": next( | |
(val[0] for val in book.get_metadata("DC", "date")), None | |
), | |
"rights": next( | |
(val[0] for val in book.get_metadata("DC", "rights")), None | |
), | |
"subjects": [val[0] for val in book.get_metadata("DC", "subject")], | |
} | |
cover_base64 = None | |
for item in book.get_items(): | |
if item.get_type() == ebooklib.ITEM_COVER: | |
cover_data = item.get_content() | |
cover_base64 = base64.b64encode(cover_data).decode("utf-8") | |
break | |
metadata["cover"] = cover_base64 | |
return metadata | |
except Exception as e: | |
print(f"Error reading epub: {e}") | |
return None | |
except ImportError: | |
return None | |
def get_db_path(pattern: str) -> str: | |
paths = glob.glob(os.path.expanduser(pattern)) | |
if not paths: | |
raise FileNotFoundError(f"No database found matching pattern: {pattern}") | |
return paths[0] | |
def get_book_details() -> List[BookDetail]: | |
try: | |
with sqlite3.connect(get_db_path(LIBRARY_DB_PATTERN)) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
"""SELECT ZASSETID, ZSORTTITLE, ZSORTAUTHOR, ZBOOKDESCRIPTION, ZEPUBID, ZPATH | |
FROM ZBKLIBRARYASSET""" | |
) | |
return [ | |
BookDetail( | |
asset_id=row[0], | |
title=row[1], | |
author=row[2], | |
description=row[3], | |
epub_id=row[4], | |
path=row[5], | |
isbn=None, | |
language=None, | |
publisher=None, | |
publication_date=None, | |
rights=None, | |
subjects=None, | |
cover=None, | |
) | |
for row in cursor.fetchall() | |
] | |
except sqlite3.Error as e: | |
logging.error(f"Database error: {e}") | |
raise | |
def get_books_with_highlights() -> List[str]: | |
book_ids = [book.asset_id for book in get_book_details()] | |
placeholders = ",".join("?" for _ in book_ids) | |
try: | |
with sqlite3.connect(get_db_path(ANNOTATION_DB_PATTERN)) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
f"""SELECT DISTINCT ZANNOTATIONASSETID | |
FROM ZAEANNOTATION | |
WHERE ZANNOTATIONASSETID IN ({placeholders}) | |
AND ZANNOTATIONSELECTEDTEXT != "";""", | |
book_ids, | |
) | |
return [entry[0] for entry in cursor.fetchall()] | |
except sqlite3.Error as e: | |
logging.error(f"Database error: {e}") | |
raise | |
def export_annotations( | |
asset_id: str, book_details: List[BookDetail], file_path: str, extra_meta: dict | |
) -> None: | |
try: | |
with sqlite3.connect(get_db_path(ANNOTATION_DB_PATTERN)) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
"""SELECT ZANNOTATIONSELECTEDTEXT, ZANNOTATIONNOTE, ZANNOTATIONLOCATION | |
FROM ZAEANNOTATION | |
WHERE ZANNOTATIONASSETID = ? AND ZANNOTATIONSELECTEDTEXT != "";""", | |
(asset_id,), | |
) | |
annotations = cursor.fetchall() | |
except sqlite3.Error as e: | |
logging.error(f"Database error: {e}") | |
raise | |
create_file(book_details, annotations, file_path, extra_meta) | |
def create_file( | |
book_detail: BookDetail, | |
annotations: List[Tuple[str, str, str]], | |
file_path: str, | |
extra_meta: dict, | |
) -> None: | |
if extra_meta: | |
book_detail = book_detail._replace(**extra_meta) | |
try: | |
# Frontmatter | |
output_md = "---\n" | |
for key, value in { | |
field: getattr(book_detail, field) for field in BookDetail._fields | |
}.items(): | |
if value and key != "cover": | |
output_md += f"{key}: {sanitize_frontmatter(value)}\n" | |
output_md += "---\n\n" | |
# Title | |
output_md += f"# {book_detail.title} by {book_detail.author}\n\n" | |
# Cover image | |
if extra_meta and extra_meta.get("cover"): | |
output_md += f"\n\n" | |
# Metadata | |
output_md += "## Metadata\n\n" | |
for key, value in { | |
field: getattr(book_detail, field) for field in BookDetail._fields | |
}.items(): | |
if key == "path": | |
output_md += f"- {key}: [{value}](file://{value})\n" | |
elif value and key != "cover": | |
output_md += f"- {key}: {value}\n" | |
# Annotations | |
output_md += "\n" | |
output_md += "## Annotations\n\n" | |
for highlight, note, location in annotations: | |
# TODO: See if something like this can be used | |
# epubcfi_link = f"epub://{book_detail.path}#{location}" | |
# output_md += f"### Location: [Open in iBooks]({epubcfi_link})\n\n" | |
output_md += "\n".join([f"> {line}" for line in highlight.split("\n")]) | |
output_md += f"\n\n" | |
if note: | |
output_md += f"{note}\n\n" | |
output_md += f"---\n\n" | |
file_name = f"{book_detail.title} - {book_detail.author}.md" | |
with open( | |
( | |
os.path.abspath(os.path.join(file_path, file_name)) | |
if file_path | |
else file_name | |
), | |
"w", | |
) as mdfile: | |
mdfile.write(output_md) | |
except IOError as e: | |
logging.error(f"Error writing to file: {e}") | |
raise | |
def main(): | |
try: | |
file_path = None | |
if len(sys.argv) > 1: | |
file_path = sys.argv[1] | |
if len(sys.argv) > 3: | |
vault_path = sys.argv[1] | |
folder = sys.argv[3] | |
file_path = os.path.join(vault_path, folder) | |
book_details = get_book_details() | |
books_with_highlights = get_books_with_highlights() | |
except (FileNotFoundError, sqlite3.Error) as e: | |
logging.error(f"Error initializing: {e}") | |
print("An error occurred accessing the Books database.") | |
return | |
for book in books_with_highlights: | |
try: | |
book_detail = next((bd for bd in book_details if bd.asset_id == book), None) | |
if book_detail: | |
extra_meta = get_epub_metadata(book_detail.path) | |
export_annotations(book, book_detail, file_path, extra_meta) | |
print(f"Exported annotations for book: {book_detail.title}") | |
else: | |
logging.error(f"Book details not found for asset_id: {book}") | |
except (ValueError, sqlite3.Error, IOError) as e: | |
print(f"Error exporting annotations: {e}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment