Skip to content

Instantly share code, notes, and snippets.

Created January 22, 2021 20:37
Show Gist options
  • Save jaiamo/e2e853174a8652231739c166f4b9fe4c to your computer and use it in GitHub Desktop.
Save jaiamo/e2e853174a8652231739c166f4b9fe4c to your computer and use it in GitHub Desktop.
Generate markdown notes from Kindle highlights
""" Generate markdown notes from Kindle highlights
import os
import sys
import re
import argparse
import requests
from bs4 import BeautifulSoup
# Parse arguments
parser = argparse.ArgumentParser(description="Generate markdown notes from Kindle highlights")
parser.add_argument("-l", action="store_true", help="list all books")
group = parser.add_mutually_exclusive_group()
group.add_argument("-a", action="store_true", help="save notes from all books")
group.add_argument("-n", metavar="book_num", type=int, help="save notes from specified book number")
args = parser.parse_args()
if len(sys.argv) == 1:
# Retrieve environment variables (set based on request headers of browser logged into Kindle notebook)
user_agent = os.environ.get("KINDLE_USER_AGENT") or sys.exit("Need $KINDLE_USER_AGENT environment variable.")
cookie = os.environ.get("KINDLE_COOKIE") or sys.exit("Need $KINDLE_COOKIE environment variable.")
# Use headers to retrieve Kindle notebook page
base_url = ""
session = requests.Session()
"user-agent": user_agent,
"cookie" : cookie
response = session.get(base_url)
soup = BeautifulSoup(response.text, "lxml")
if soup.select_one("title").get_text() != "Kindle: Your Notes and Highlights":
with open("/tmp/kindle.html", "w") as file:
sys.exit(f"Unexpected page. Check: {os.path.abspath(}")
# Select divs containing books
books ="#kp-notebook-library .kp-notebook-library-each-book")
for i, book in enumerate(books):
asin = book["id"]
title = book.find("h2").get_text().strip()
author = book.find("p").get_text().split(":")[-1].strip()
short_title = title.split(":")[0]
file_name = re.sub('[~"#%&*:<>?\/\\{|}]+', "", short_title).lower()
# List books if arguments have list flag
if args.l:
print(f"{i+1:2d}: {short_title} by {author}")
# Write markdown to {short_title}.md for each book if arguments have save flags
if args.n == i+1 or args.a:
with open (f"{file_name}.md", "w") as file:
# Write YAML, title, and metadata
yaml = f"---\ntags:\n - #book\n---\n\n"
metadata = (
f"## Metadata\n\n"
f"- Title: {title}\n"
f"- Author: {author}\n"
f"- ASIN: [{asin}](kindle://book?action=openasin={asin})\n\n"
file.write(f"# {title} by {author}\n\n")
file.write(f"## Notes")
# Retrieve highlights / notes split across multiple html pages
content_limit_state = "&"
token = ""
next_page = True
while next_page:
response = session.get(f"{base_url}?asin={asin}&token={token}&contentLimitState={content_limit_state}")
soup = BeautifulSoup(response.text, "lxml")
highlights =".a-spacing-base .kp-notebook-row-separator")
# Write highlights / notes to file
for highlight in highlights:
location = highlight.select_one("#kp-annotation-location")["value"]
highlight_select = highlight.select_one("#highlight")
note_text = highlight.select_one("#note").get_text()
# Some locations don't have #highlight divs, so select returns None
if highlight_select: file.write(f"{highlight_select.get_text()}")
# All locations have #note divs, but most empty
if len(note_text): file.write(f"\n\nNote: {note_text}")
file.write(f" - (Loc: [{location}](kindle://book?action=open&location=#{location}&asin={asin}))")
# Get URL parameters for next page if it exists
token_select = soup.select_one(".kp-notebook-annotations-next-page-start")
next_page = token_select.has_attr("value")
if next_page:
token = token_select["value"]
content_limit_state = soup.select_one(".kp-notebook-content-limit-state")["value"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment