Skip to content

Instantly share code, notes, and snippets.

@stared
Last active March 13, 2025 09:23
Show Gist options
  • Save stared/ce732ef27d97d559b34d7e294481f1b0 to your computer and use it in GitHub Desktop.
Save stared/ce732ef27d97d559b34d7e294481f1b0 to your computer and use it in GitHub Desktop.
Convert Kindle highlights and notes into Markdown - to be used in Obsidian or Static Site Generator
"""
kindle_highlights.py
This script provides functionality to parse and process Kindle highlights and notes.
It extracts data from a Kindle clippings file and organizes it into a structured format.
Usage:
from kindle_highlights import KindleClippingCollection
collection = KindleClippingCollection.extract_from_file("path/to/My Clippings.txt")
collection.save_as_files("output_directory")
"""
from pydantic import BaseModel, field_validator, ValidationError
from typing import Optional, Literal, Dict, List
from datetime import datetime
import re
import logging
from os import path
class KindleClipping(BaseModel):
title: str
author: str
type: Literal["Highlight", "Note", "Bookmark"]
page: Optional[int | str]
location_start: int
location_end: Optional[int]
date: datetime
content: str
@field_validator("date", mode="before")
def parse_date(cls, value):
return datetime.strptime(value, "%A, %d %B %Y %H:%M:%S")
def to_location_range(self) -> str:
if self.location_end:
return f"{self.location_start}-{self.location_end}"
else:
return str(self.location_start)
def to_position(self) -> str:
if self.page:
return f"page {self.page}, loc. {self.to_location_range()}"
else:
return f"loc. {self.to_location_range()}"
def to_markdown(self) -> str:
if self.type == "Highlight":
return f"* {self.content}\n - {self.to_position()}"
elif self.type == "Note":
return f" - **COMMENT**: {self.content}"
elif self.type == "Bookmark":
return f"* **BOOKMARK**: {self.content} {self.to_position()}"
else:
raise ValueError(f"Unknown type: {self.type}")
class KindleClippingCollection:
def __init__(self, books: Dict[str, List[KindleClipping]]):
self.books = books
@classmethod
def group_by_book_title(
cls, clippings: List[KindleClipping]
) -> Dict[str, List[KindleClipping]]:
books = {}
for clipping in clippings:
if clipping.title not in books:
books[clipping.title] = []
books[clipping.title].append(clipping)
return cls(books)
@classmethod
def extract_from_file(
cls, file_path: str, clear_highlights: bool = True
) -> "KindleClippingCollection":
with open(file_path, "r", encoding="utf-8-sig") as file:
text = file.read()
parts = text.split("\n==========\n")
pattern = re.compile(
r"^(?P<title>.*) \((?P<author>.*)\)\n- Your (?P<type>Highlight|Note|Bookmark)(?: on page (?P<page>[\w\d]+) \|)?(?: at)? location (?P<location_start>\d+)(?:-(?P<location_end>\d+))? \| Added on (?P<date>.*)\n+(?P<content>(?:.|\n)*)",
re.MULTILINE,
)
clippings: List[KindleClipping] = []
for part in parts:
match = pattern.match(part)
if match:
try:
clippings.append(KindleClipping(**match.groupdict()))
except ValidationError as e:
logging.error(f"Error parsing clipping ({e}):\n{part}")
else:
logging.error(f"Error parsing clipping (no match):\n{part}")
res = cls.group_by_book_title(clippings)
if clear_highlights:
res = res.clear_highlights_all()
return res
@staticmethod
def clear_highlights(clippings: List[KindleClipping]) -> List[KindleClipping]:
last = clippings[0]
filtered = [last]
for clipping in clippings[1:]:
if (
clipping.type == "Highlight"
and clipping.location_start == last.location_start
):
filtered[-1] = clipping
logging.info(
f"Highlight removed as it seems to be updated.\nOLD:{last.content}\nNEW:{clipping.content}"
)
else:
filtered.append(clipping)
last = clipping
return filtered
def clear_highlights_all(self) -> "KindleClippingCollection":
return KindleClippingCollection(
{
title: self.clear_highlights(clippings)
for title, clippings in self.books.items()
}
)
@staticmethod
def book_to_markdown(clippings: List[KindleClipping], as_file: bool = True) -> str:
quotations = "\n".join([clipping.to_markdown() for clipping in clippings])
title = clippings[0].title
author = clippings[0].author
dates = [clipping.date for clipping in clippings]
date_first = min(dates).strftime("%Y-%m-%d")
date_last = max(dates).strftime("%Y-%m-%d")
if as_file:
return f"""---
title: "{title}"
author: {author}
date_first: {date_first}
date_last: {date_last}
---
## Quotations
{quotations}"""
else:
return f"""## {title} by {author}
Highlights from {min(dates).strftime('%Y-%m-%d')} to {max(dates).strftime('%Y-%m-%d')}
{quotations}"""
def save_as_files(self, output_dir: str, min_highlights_for_separate_file: int = 5):
others = []
for title, clippings in self.books.items():
if len(clippings) >= min_highlights_for_separate_file:
filename = re.sub(r'[<>:"/\\|?*]', "", title)
with open(path.join(output_dir, f"{filename}.md"), "w", encoding="utf-8") as file:
file.write(self.book_to_markdown(clippings, as_file=True))
else:
others.append(self.book_to_markdown(clippings, as_file=False))
with open(path.join(output_dir, "other.md"), "w", encoding="utf-8") as file:
file.write("\n\n".join(others))
The Design of Everyday Things: Revised and Expanded Edition (Norman, Don)
- Your Highlight on page 101 | location 2210-2212 | Added on Wednesday, 11 May 2016 21:05:40
It is a profoundly erroneous truism, repeated by all copy-books and by eminent people when they are making speeches, that we should cultivate the habit of thinking of what we are doing. The precise opposite is the case. Civilization advances by extending the number of important operations which we can perform without thinking about them. (Alfred North Whitehead, 1911.)
==========
The Design of Everyday Things: Revised and Expanded Edition (Norman, Don)
- Your Highlight on page 111 | location 2395-2396 | Added on Saturday, 18 June 2016 19:33:53
unless it is triggered by some external event or unless we deliberately keep it in mind through constant repetition (which then prevents us from having other conscious thoughts).
==========
The Design of Everyday Things: Revised and Expanded Edition (Norman, Don)
- Your Highlight on page 118 | location 2558-2560 | Added on Saturday, 18 June 2016 21:36:50
In some cultures, time is represented mentally as if it were a road stretching out ahead of the person. As a person moves through time, the person moves forward along the time line. Other cultures use the same representation, except now it is the person who is fixed and it is time that moves: an event in the future moves toward the person.
==========
The Design of Everyday Things: Revised and Expanded Edition (Norman, Don)
- Your Highlight on page 132 | location 2796-2797 | Added on Wednesday, 29 June 2016 00:49:49
Violate conventions and you are marked as an outsider.
==========
The Design of Everyday Things: Revised and Expanded Edition (Norman, Don)
- Your Highlight on page 187 | location 3809-3810 | Added on Thursday, 7 July 2016 13:40:27
Never underestimate the power of social pressures on behavior, causing otherwise sensible people to do things they know are wrong and possibly dangerous.
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 23 | location 391-392 | Added on Tuesday, 30 May 2017 14:38:08
“Try in every way to have a ready command of the manners which make people think ‘he’s a mighty likeable fellow,’”
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 24 | location 416-418 | Added on Tuesday, 30 May 2017 19:39:44
It was no coincidence that in the 1920s and the 1930s, Americans became obsessed with movie stars. Who better than a matinee idol to model personal magnetism?
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 25 | location 437-438 | Added on Tuesday, 30 May 2017 20:01:50
This portrayal of courtship as a high-stakes performance reflected the bold new mores of the Culture of Personality.
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 31 | location 550-552 | Added on Friday, 2 June 2017 11:06:33
The number of Americans who considered themselves shy increased from 40 percent in the 1970s to 50 percent in the 1990s, probably because we measured ourselves against ever higher standards of fearless self-presentation.
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 31 | location 554-555 | Added on Friday, 2 June 2017 11:07:48
(DSM-IV), the psychiatrist’s bible of mental disorders, considers the fear of public speaking to be a pathology—not an annoyance, not a disadvantage, but a disease—
==========
Quiet: The Power of Introverts in a World That Can't Stop Talking (Cain, Susan)
- Your Highlight on page 33 | location 587-587 | Added on Friday, 2 June 2017 11:24:15
Carnegie’s best-selling advice on how to get people to admire you and do your bidding.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 307 | location 4636-4636 | Added on Wednesday, 23 October 2019 05:12:34
Don’t feel chained to conventional careers or conventional ways of coping.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 307 | location 4636-4637 | Added on Wednesday, 23 October 2019 05:13:00
Don’t feel chained to conventional careers or conventional ways of coping. Give yourself permission to be yourself. Give up trying to be the person you always
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 307 | location 4636-4638 | Added on Wednesday, 23 October 2019 05:13:15
Don’t feel chained to conventional careers or conventional ways of coping. Give yourself permission to be yourself. Give up trying to be the person you always thought you should be—the model student or the organized executive, for example—and let yourself be who you are.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 307 | location 4640-4641 | Added on Wednesday, 23 October 2019 05:14:57
Its cure is not to be found in the power of the will, nor in punishment, nor in sacrifice, nor in pain.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Note on page 307 | location 4641 | Added on Wednesday, 23 October 2019 05:15:11
Catholicism
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 308 | location 4649-4650 | Added on Wednesday, 23 October 2019 05:17:34
If your organization system can be stimulating (imagine that!), instead of boring, then you will be more likely to follow it.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 308 | location 4649-4651 | Added on Wednesday, 23 October 2019 05:17:44
If your organization system can be stimulating (imagine that!), instead of boring, then you will be more likely to follow it. For example, in setting things up, try color coding. Mentioned above, color coding deserves emphasis. Many people with ADD are visually oriented. Take
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 309 | location 4656-4657 | Added on Wednesday, 23 October 2019 05:19:04
For people with ADD, TO DO piles might just as well be called NEVER DONE piles. They
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 309 | location 4656-4657 | Added on Wednesday, 23 October 2019 05:19:08
For people with ADD, TO DO piles might just as well be called NEVER DONE piles.
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 309 | location 4656-4658 | Added on Wednesday, 23 October 2019 05:19:25
For people with ADD, TO DO piles might just as well be called NEVER DONE piles. They serve as little menaces around one’s desk or room, silently building guilt, anxiety, and resentment, as well as taking up a lot of space. Get in the habit of acting
==========
Driven to Distraction (Revised) (Hallowell, Edward M.)
- Your Highlight on page 309 | location 4656-4658 | Added on Wednesday, 23 October 2019 05:19:30
For people with ADD, TO DO piles might just as well be called NEVER DONE piles. They serve as little menaces around one’s desk or room, silently building guilt, anxiety, and resentment, as well as taking up a lot of space. Get in the habit
==========
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment