- Plug in your Kobo device to find
KoboReader.sqlitefile under the hidden folder.kobo/. - Create
extract_kobo_notes.py(forked from https://unmesh.dev/post/kobo_highlights/):
import sqlite3
import os
import argparse
import re # Import the regular expression module
# Define default paths
DEFAULT_DB_PATH = './KoboReader.sqlite'
DEFAULT_OUTPUT_DIR = './kobo_highlights'
def connect_to_database(path):
"""Connect to the SQLite database and return the connection."""
print(f"Attempting to connect to database: {path}")
try:
conn = sqlite3.connect(path)
print("Database connection successful.")
return conn
except sqlite3.Error as e:
print(f"Error connecting to database: {e}")
return None
def get_books_with_highlights(conn):
"""Get a list of books that have highlights."""
books = []
print("Fetching books with highlights...")
try:
cursor = conn.cursor()
# Query to get distinct book info for books that appear in the Bookmark table
query = """
SELECT DISTINCT content.ContentId,
content.Title,
content.Attribution AS Author,
content.DateLastRead,
content.TimeSpentReading
FROM Bookmark
INNER JOIN content ON Bookmark.VolumeID = content.ContentID
ORDER BY content.Title;
"""
cursor.execute(query)
books = cursor.fetchall()
print(f"Found {len(books)} book(s) with highlights.")
except sqlite3.Error as e:
print(f"Error fetching books: {e}")
return books
def get_highlights_for_book(conn, content_id):
"""Get all highlights for a given book."""
highlights = []
try:
cursor = conn.cursor()
# Simplified query: Directly select from Bookmark table using VolumeID
query = """
SELECT Bookmark.Text
FROM Bookmark
WHERE Bookmark.VolumeID = ?
"""
cursor.execute(query, (content_id,))
highlights = cursor.fetchall()
# print(f" Found {len(highlights)} highlight(s) for book ID {content_id}") # Optional: print per book
except sqlite3.Error as e:
print(f"Error fetching highlights for book ID {content_id}: {e}")
return highlights
def sanitize_filename(title):
"""Generate a safe filename from a book title."""
# Replace common problematic characters with underscores
sanitized_title = title.replace('/', '_').replace('\\', '_')
# Remove characters that are generally unsafe or reserved in filenames
# This pattern keeps alphanumeric, underscore, hyphen, and space.
sanitized_title = re.sub(r'[^\w\s-]', '', sanitized_title)
# Replace one or more spaces with a single underscore
sanitized_title = re.sub(r'\s+', '_', sanitized_title).strip('_')
# Convert to lowercase
sanitized_title = sanitized_title.lower()
# Ensure filename is not empty after sanitization, provide a default if needed
if not sanitized_title:
sanitized_title = "untitled_book"
return sanitized_title + ".md"
def create_markdown_for_book(book, highlights, output_dir):
"""Create a markdown file for a book and insert all highlights."""
title = book[1]
author = book[2]
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
print(f"Creating output directory: {output_dir}")
try:
os.makedirs(output_dir)
except OSError as e:
print(f"Error creating directory {output_dir}: {e}")
return # Skip creating file if directory creation failed
filename = sanitize_filename(title)
filepath = os.path.join(output_dir, filename)
print(f" Creating markdown for '{title}' ({len(highlights)} highlights) -> {filepath}")
try:
with open(filepath, 'w', encoding='utf-8') as md_file:
md_file.write(f"# {title}\n\n")
md_file.write(f"## Author(s) - {author if author else 'N/A'}\n\n") # Handle potential missing author
if highlights:
md_file.write("### Highlights\n\n")
for highlight in highlights:
# highlight[0] is the text of the highlight
md_file.write(f"> {highlight[0]}\n\n")
else:
md_file.write("### No highlights found for this book.\n\n") # Indicate if no highlights
print(f" Successfully created {filename}")
except IOError as e:
print(f"Error writing markdown file {filepath}: {e}")
except Exception as e:
print(f"An unexpected error occurred while writing {filepath}: {e}")
def main():
# --- Setup Argument Parser ---
parser = argparse.ArgumentParser(description="Extract highlights from KoboReader.sqlite and save as Markdown.")
parser.add_argument(
'--db',
type=str,
default=DEFAULT_DB_PATH,
help=f"Path to the KoboReader.sqlite database file (default: {DEFAULT_DB_PATH})"
)
parser.add_argument(
'--output',
type=str,
default=DEFAULT_OUTPUT_DIR,
help=f"Directory to save the markdown files (default: {DEFAULT_OUTPUT_DIR})"
)
args = parser.parse_args()
# Use paths from arguments
db_path = args.db
output_dir = args.output
# --- Main Logic ---
conn = connect_to_database(db_path)
if conn:
books = get_books_with_highlights(conn)
if books:
for i, book in enumerate(books):
# print(f"Processing book {i+1}/{len(books)}: {book[1]}...") # Detailed progress
highlights = get_highlights_for_book(conn, book[0])
# Pass output_dir to the function
create_markdown_for_book(book, highlights, output_dir)
print("\nHighlight extraction complete.")
else:
print("No books with highlights found in the database.")
conn.close()
print("Database connection closed.")
else:
print("Failed to connect to the database. Exiting.")
if __name__ == "__main__":
main()- Run
python3 extract_kobo_notes.py, which reads./KoboReader.sqliteand exports highlights as markdown files under./kobo_highlights/. Use--dbto specify alternative input file path, and--outputas alternative output folder path.