Skip to content

Instantly share code, notes, and snippets.

@spewil
Last active October 18, 2019 16:40
Show Gist options
  • Save spewil/f88e7f5e2e09bf8b055a947a8a76f001 to your computer and use it in GitHub Desktop.
Save spewil/f88e7f5e2e09bf8b055a947a8a76f001 to your computer and use it in GitHub Desktop.
import Kindle "My Clippings.txt" into separate markdown files per author
import re
import sys
"""
This is a helpful little script for importing your kindle underlines from books.
Because I read multiple books at once, my underlines get mixed up and out of order.
This script untangles my underlines and puts each book into a nice neat markdown file.
Things to be wary of:
- Author names should be adjusted to be 3 words or less (including hyphenations)
- Use Calibre to access your clippings file easily
"""
# pass in the "My Clippings.txt" filepath
clippings_path = sys.argv[1]
with open(clippings_path) as clipping_file:
lines = clipping_file.readlines()
for i, line in enumerate(lines):
# This regex finds the book's author name (Author Name)
results = re.findall(r"\(((?:.\w*){,3})\)", line)
# if there is a parenthetical candidate in this line
if len(results) == 1:
name = results[0]
parts = name.split(' ')
# if the author name is 3 words or less
if len(parts) <= 3:
# if the author name has at least 2 capital letters
if sum(1 for c in name if c.isupper()) >= 2:
# if the author name has at least 4 characters
if len(name) > 4:
# make the filename-- underscores and lowercase
name = name.replace(' ', '_').lower()
# this regex gets rid of any non-alphanumeric characters except underscores
filename = re.sub(r'([^_\w])+', '', name)
# this is Kindle clipping markup for the end of a quote
if "=====" in lines[i + 4]:
# grab the actual underline (they exist as a single line)
quote = lines[i + 3]
# append the author's file with the new quote
with open("output/" + filename + ".md",
'a') as output_file:
output_file.write('==========\n')
output_file.write(quote)
title = re.sub(r"(\((?:.\w*){,3}\))", '', line)
output_file.write(title)
output_file.write(lines[i + 1])
output_file.write('\n\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment