Skip to content

Instantly share code, notes, and snippets.

@eristoddle
Created January 12, 2025 00:55
Show Gist options
  • Save eristoddle/5a8e7dd0597d09d00aa5de066788c303 to your computer and use it in GitHub Desktop.
Save eristoddle/5a8e7dd0597d09d00aa5de066788c303 to your computer and use it in GitHub Desktop.
Python OSX Book app highlight and note exporter. This will work standalone to export highlights as markdown files or in Obsidian with the Python Scripter plugin.
import os
import glob
import sqlite3
import logging
import sys
from typing import List, Tuple, NamedTuple
ANNOTATION_DB_PATTERN = "~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation*.sqlite"
LIBRARY_DB_PATTERN = (
"~/Library/Containers/com.apple.iBooksX/Data/Documents/BKLibrary/BKLibrary*.sqlite"
)
logging.basicConfig(
level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s"
)
class BookDetail(NamedTuple):
asset_id: str
title: str
author: str | None
description: str | None
epub_id: str | None
path: str | None
isbn: str | None
language: str | None
publisher: str | None
publication_date: str | None
rights: str | None
subjects: list[str] | None
cover: str | None
def sanitize_frontmatter(text: str) -> str:
if not text:
return ""
replacements = {
":": " -",
"[": "(",
"]": ")",
"{": "(",
"}": ")",
"#": "",
"|": "-",
">": "-",
"\\": "/",
"\n": " ",
"\r": " ",
}
result = str(text)
for char, replacement in replacements.items():
result = result.replace(char, replacement)
result = " ".join(result.split())
return result.strip()
def get_epub_metadata(epub_path: str):
try:
import ebooklib
from ebooklib import epub
import base64
if not epub_path:
return None
try:
book = epub.read_epub(epub_path)
metadata = {
"isbn": next(
(
val
for _, val in book.get_metadata("DC", "identifier")
if isinstance(val, str) and "isbn" in val.lower()
),
None,
),
"language": next(
(val[0] for val in book.get_metadata("DC", "language")), None
),
"publisher": next(
(val[0] for val in book.get_metadata("DC", "publisher")), None
),
"publication_date": next(
(val[0] for val in book.get_metadata("DC", "date")), None
),
"rights": next(
(val[0] for val in book.get_metadata("DC", "rights")), None
),
"subjects": [val[0] for val in book.get_metadata("DC", "subject")],
}
cover_base64 = None
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_COVER:
cover_data = item.get_content()
cover_base64 = base64.b64encode(cover_data).decode("utf-8")
break
metadata["cover"] = cover_base64
return metadata
except Exception as e:
print(f"Error reading epub: {e}")
return None
except ImportError:
return None
def get_db_path(pattern: str) -> str:
paths = glob.glob(os.path.expanduser(pattern))
if not paths:
raise FileNotFoundError(f"No database found matching pattern: {pattern}")
return paths[0]
def get_book_details() -> List[BookDetail]:
try:
with sqlite3.connect(get_db_path(LIBRARY_DB_PATTERN)) as conn:
cursor = conn.cursor()
cursor.execute(
"""SELECT ZASSETID, ZSORTTITLE, ZSORTAUTHOR, ZBOOKDESCRIPTION, ZEPUBID, ZPATH
FROM ZBKLIBRARYASSET"""
)
return [
BookDetail(
asset_id=row[0],
title=row[1],
author=row[2],
description=row[3],
epub_id=row[4],
path=row[5],
isbn=None,
language=None,
publisher=None,
publication_date=None,
rights=None,
subjects=None,
cover=None,
)
for row in cursor.fetchall()
]
except sqlite3.Error as e:
logging.error(f"Database error: {e}")
raise
def get_books_with_highlights() -> List[str]:
book_ids = [book.asset_id for book in get_book_details()]
placeholders = ",".join("?" for _ in book_ids)
try:
with sqlite3.connect(get_db_path(ANNOTATION_DB_PATTERN)) as conn:
cursor = conn.cursor()
cursor.execute(
f"""SELECT DISTINCT ZANNOTATIONASSETID
FROM ZAEANNOTATION
WHERE ZANNOTATIONASSETID IN ({placeholders})
AND ZANNOTATIONSELECTEDTEXT != "";""",
book_ids,
)
return [entry[0] for entry in cursor.fetchall()]
except sqlite3.Error as e:
logging.error(f"Database error: {e}")
raise
def export_annotations(
asset_id: str, book_details: List[BookDetail], file_path: str, extra_meta: dict
) -> None:
try:
with sqlite3.connect(get_db_path(ANNOTATION_DB_PATTERN)) as conn:
cursor = conn.cursor()
cursor.execute(
"""SELECT ZANNOTATIONSELECTEDTEXT, ZANNOTATIONNOTE, ZANNOTATIONLOCATION
FROM ZAEANNOTATION
WHERE ZANNOTATIONASSETID = ? AND ZANNOTATIONSELECTEDTEXT != "";""",
(asset_id,),
)
annotations = cursor.fetchall()
except sqlite3.Error as e:
logging.error(f"Database error: {e}")
raise
create_file(book_details, annotations, file_path, extra_meta)
def create_file(
book_detail: BookDetail,
annotations: List[Tuple[str, str, str]],
file_path: str,
extra_meta: dict,
) -> None:
if extra_meta:
book_detail = book_detail._replace(**extra_meta)
try:
# Frontmatter
output_md = "---\n"
for key, value in {
field: getattr(book_detail, field) for field in BookDetail._fields
}.items():
if value and key != "cover":
output_md += f"{key}: {sanitize_frontmatter(value)}\n"
output_md += "---\n\n"
# Title
output_md += f"# {book_detail.title} by {book_detail.author}\n\n"
# Cover image
if extra_meta and extra_meta.get("cover"):
output_md += f"![Cover](data:image/jpeg;base64,{extra_meta['cover']})\n\n"
# Metadata
output_md += "## Metadata\n\n"
for key, value in {
field: getattr(book_detail, field) for field in BookDetail._fields
}.items():
if key == "path":
output_md += f"- {key}: [{value}](file://{value})\n"
elif value and key != "cover":
output_md += f"- {key}: {value}\n"
# Annotations
output_md += "\n"
output_md += "## Annotations\n\n"
for highlight, note, location in annotations:
# TODO: See if something like this can be used
# epubcfi_link = f"epub://{book_detail.path}#{location}"
# output_md += f"### Location: [Open in iBooks]({epubcfi_link})\n\n"
output_md += "\n".join([f"> {line}" for line in highlight.split("\n")])
output_md += f"\n\n"
if note:
output_md += f"{note}\n\n"
output_md += f"---\n\n"
file_name = f"{book_detail.title} - {book_detail.author}.md"
with open(
(
os.path.abspath(os.path.join(file_path, file_name))
if file_path
else file_name
),
"w",
) as mdfile:
mdfile.write(output_md)
except IOError as e:
logging.error(f"Error writing to file: {e}")
raise
def main():
try:
file_path = None
if len(sys.argv) > 1:
file_path = sys.argv[1]
if len(sys.argv) > 3:
vault_path = sys.argv[1]
folder = sys.argv[3]
file_path = os.path.join(vault_path, folder)
book_details = get_book_details()
books_with_highlights = get_books_with_highlights()
except (FileNotFoundError, sqlite3.Error) as e:
logging.error(f"Error initializing: {e}")
print("An error occurred accessing the Books database.")
return
for book in books_with_highlights:
try:
book_detail = next((bd for bd in book_details if bd.asset_id == book), None)
if book_detail:
extra_meta = get_epub_metadata(book_detail.path)
export_annotations(book, book_detail, file_path, extra_meta)
print(f"Exported annotations for book: {book_detail.title}")
else:
logging.error(f"Book details not found for asset_id: {book}")
except (ValueError, sqlite3.Error, IOError) as e:
print(f"Error exporting annotations: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment