Skip to content

Instantly share code, notes, and snippets.

@jaiamo
Created January 22, 2021 20:37
Show Gist options
  • Save jaiamo/e2e853174a8652231739c166f4b9fe4c to your computer and use it in GitHub Desktop.
Save jaiamo/e2e853174a8652231739c166f4b9fe4c to your computer and use it in GitHub Desktop.
Generate markdown notes from Kindle highlights
"""
kindle.py: Generate markdown notes from Kindle highlights
"""
import os
import sys
import re
import argparse
import requests
from bs4 import BeautifulSoup
# Parse arguments
parser = argparse.ArgumentParser(description="Generate markdown notes from Kindle highlights")
parser.add_argument("-l", action="store_true", help="list all books")
group = parser.add_mutually_exclusive_group()
group.add_argument("-a", action="store_true", help="save notes from all books")
group.add_argument("-n", metavar="book_num", type=int, help="save notes from specified book number")
args = parser.parse_args()
if len(sys.argv) == 1:
print(parser.print_help())
sys.exit()
# Retrieve environment variables (set based on request headers of browser logged into Kindle notebook)
user_agent = os.environ.get("KINDLE_USER_AGENT") or sys.exit("Need $KINDLE_USER_AGENT environment variable.")
cookie = os.environ.get("KINDLE_COOKIE") or sys.exit("Need $KINDLE_COOKIE environment variable.")
# Use headers to retrieve Kindle notebook page
base_url = "https://read.amazon.com/notebook"
session = requests.Session()
session.headers.update({
"user-agent": user_agent,
"cookie" : cookie
})
response = session.get(base_url)
soup = BeautifulSoup(response.text, "lxml")
if soup.select_one("title").get_text() != "Kindle: Your Notes and Highlights":
with open("/tmp/kindle.html", "w") as file:
file.write(soup.prettify())
sys.exit(f"Unexpected page. Check: {os.path.abspath(file.name)}")
# Select divs containing books
books = soup.select("#kp-notebook-library .kp-notebook-library-each-book")
for i, book in enumerate(books):
asin = book["id"]
title = book.find("h2").get_text().strip()
author = book.find("p").get_text().split(":")[-1].strip()
short_title = title.split(":")[0]
file_name = re.sub('[~"#%&*:<>?\/\\{|}]+', "", short_title).lower()
# List books if arguments have list flag
if args.l:
print(f"{i+1:2d}: {short_title} by {author}")
# Write markdown to {short_title}.md for each book if arguments have save flags
if args.n == i+1 or args.a:
with open (f"{file_name}.md", "w") as file:
# Write YAML, title, and metadata
yaml = f"---\ntags:\n - #book\n---\n\n"
metadata = (
f"## Metadata\n\n"
f"- Title: {title}\n"
f"- Author: {author}\n"
f"- ASIN: [{asin}](kindle://book?action=openasin={asin})\n\n"
)
file.write(yaml)
file.write(f"# {title} by {author}\n\n")
file.write(metadata)
file.write(f"## Notes")
# Retrieve highlights / notes split across multiple html pages
content_limit_state = "&"
token = ""
next_page = True
while next_page:
response = session.get(f"{base_url}?asin={asin}&token={token}&contentLimitState={content_limit_state}")
soup = BeautifulSoup(response.text, "lxml")
highlights = soup.select(".a-spacing-base .kp-notebook-row-separator")
# Write highlights / notes to file
for highlight in highlights:
location = highlight.select_one("#kp-annotation-location")["value"]
highlight_select = highlight.select_one("#highlight")
note_text = highlight.select_one("#note").get_text()
file.write("\n\n---\n\n")
# Some locations don't have #highlight divs, so select returns None
if highlight_select: file.write(f"{highlight_select.get_text()}")
# All locations have #note divs, but most empty
if len(note_text): file.write(f"\n\nNote: {note_text}")
file.write(f" - (Loc: [{location}](kindle://book?action=open&location=#{location}&asin={asin}))")
# Get URL parameters for next page if it exists
token_select = soup.select_one(".kp-notebook-annotations-next-page-start")
next_page = token_select.has_attr("value")
if next_page:
token = token_select["value"]
content_limit_state = soup.select_one(".kp-notebook-content-limit-state")["value"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment