Skip to content

Instantly share code, notes, and snippets.

@janetkuo
Last active April 28, 2025 00:00
Show Gist options
  • Select an option

  • Save janetkuo/ac73a1b36e3ee65bb055b4dc67275fa7 to your computer and use it in GitHub Desktop.

Select an option

Save janetkuo/ac73a1b36e3ee65bb055b4dc67275fa7 to your computer and use it in GitHub Desktop.
  1. Plug in your Kobo device to find KoboReader.sqlite file under the hidden folder .kobo/.
  2. Create extract_kobo_notes.py (forked from https://unmesh.dev/post/kobo_highlights/):
import sqlite3
import os
import argparse
import re # Import the regular expression module

# Define default paths
DEFAULT_DB_PATH = './KoboReader.sqlite'
DEFAULT_OUTPUT_DIR = './kobo_highlights'

def connect_to_database(path):
    """Connect to the SQLite database and return the connection."""
    print(f"Attempting to connect to database: {path}")
    try:
        conn = sqlite3.connect(path)
        print("Database connection successful.")
        return conn
    except sqlite3.Error as e:
        print(f"Error connecting to database: {e}")
        return None

def get_books_with_highlights(conn):
    """Get a list of books that have highlights."""
    books = []
    print("Fetching books with highlights...")
    try:
        cursor = conn.cursor()
        # Query to get distinct book info for books that appear in the Bookmark table
        query = """
            SELECT DISTINCT content.ContentId,
                            content.Title,
                            content.Attribution AS Author,
                            content.DateLastRead,
                            content.TimeSpentReading
            FROM Bookmark
            INNER JOIN content ON Bookmark.VolumeID = content.ContentID
            ORDER BY content.Title;
        """
        cursor.execute(query)
        books = cursor.fetchall()
        print(f"Found {len(books)} book(s) with highlights.")
    except sqlite3.Error as e:
        print(f"Error fetching books: {e}")
    return books

def get_highlights_for_book(conn, content_id):
    """Get all highlights for a given book."""
    highlights = []
    try:
        cursor = conn.cursor()
        # Simplified query: Directly select from Bookmark table using VolumeID
        query = """
            SELECT Bookmark.Text
            FROM Bookmark
            WHERE Bookmark.VolumeID = ?
        """
        cursor.execute(query, (content_id,))
        highlights = cursor.fetchall()
        # print(f"  Found {len(highlights)} highlight(s) for book ID {content_id}") # Optional: print per book
    except sqlite3.Error as e:
        print(f"Error fetching highlights for book ID {content_id}: {e}")
    return highlights

def sanitize_filename(title):
    """Generate a safe filename from a book title."""
    # Replace common problematic characters with underscores
    sanitized_title = title.replace('/', '_').replace('\\', '_')
    # Remove characters that are generally unsafe or reserved in filenames
    # This pattern keeps alphanumeric, underscore, hyphen, and space.
    sanitized_title = re.sub(r'[^\w\s-]', '', sanitized_title)
    # Replace one or more spaces with a single underscore
    sanitized_title = re.sub(r'\s+', '_', sanitized_title).strip('_')
    # Convert to lowercase
    sanitized_title = sanitized_title.lower()
    # Ensure filename is not empty after sanitization, provide a default if needed
    if not sanitized_title:
        sanitized_title = "untitled_book"
    return sanitized_title + ".md"


def create_markdown_for_book(book, highlights, output_dir):
    """Create a markdown file for a book and insert all highlights."""
    title = book[1]
    author = book[2]

    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        print(f"Creating output directory: {output_dir}")
        try:
            os.makedirs(output_dir)
        except OSError as e:
            print(f"Error creating directory {output_dir}: {e}")
            return # Skip creating file if directory creation failed

    filename = sanitize_filename(title)
    filepath = os.path.join(output_dir, filename)

    print(f"  Creating markdown for '{title}' ({len(highlights)} highlights) -> {filepath}")

    try:
        with open(filepath, 'w', encoding='utf-8') as md_file:
            md_file.write(f"# {title}\n\n")
            md_file.write(f"## Author(s) - {author if author else 'N/A'}\n\n") # Handle potential missing author

            if highlights:
                md_file.write("### Highlights\n\n")
                for highlight in highlights:
                    # highlight[0] is the text of the highlight
                    md_file.write(f"> {highlight[0]}\n\n")
            else:
                 md_file.write("### No highlights found for this book.\n\n") # Indicate if no highlights

        print(f"  Successfully created {filename}")

    except IOError as e:
        print(f"Error writing markdown file {filepath}: {e}")
    except Exception as e:
        print(f"An unexpected error occurred while writing {filepath}: {e}")


def main():
    # --- Setup Argument Parser ---
    parser = argparse.ArgumentParser(description="Extract highlights from KoboReader.sqlite and save as Markdown.")
    parser.add_argument(
        '--db',
        type=str,
        default=DEFAULT_DB_PATH,
        help=f"Path to the KoboReader.sqlite database file (default: {DEFAULT_DB_PATH})"
    )
    parser.add_argument(
        '--output',
        type=str,
        default=DEFAULT_OUTPUT_DIR,
        help=f"Directory to save the markdown files (default: {DEFAULT_OUTPUT_DIR})"
    )
    args = parser.parse_args()

    # Use paths from arguments
    db_path = args.db
    output_dir = args.output

    # --- Main Logic ---
    conn = connect_to_database(db_path)
    if conn:
        books = get_books_with_highlights(conn)
        if books:
            for i, book in enumerate(books):
                 # print(f"Processing book {i+1}/{len(books)}: {book[1]}...") # Detailed progress
                highlights = get_highlights_for_book(conn, book[0])
                # Pass output_dir to the function
                create_markdown_for_book(book, highlights, output_dir)
            print("\nHighlight extraction complete.")
        else:
            print("No books with highlights found in the database.")

        conn.close()
        print("Database connection closed.")
    else:
        print("Failed to connect to the database. Exiting.")


if __name__ == "__main__":
    main()
  1. Run python3 extract_kobo_notes.py, which reads ./KoboReader.sqlite and exports highlights as markdown files under ./kobo_highlights/. Use --db to specify alternative input file path, and --output as alternative output folder path.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment