Last active
December 30, 2024 18:17
-
-
Save FedericoPonzi/0bdb4789b1f6f60204098129313156e1 to your computer and use it in GitHub Desktop.
Convert Kindle's my clippings to markdown table.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
usage: | |
``` | |
cat /tmp/My\ Clippings.txt | python3 quote-to-md.py | |
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from datetime import datetime | |
def parse_quotes_to_markdown(input_text): | |
# Split the input into individual quotes using '==========' | |
quotes = [quote.strip() for quote in input_text.split("==========") if quote.strip()] | |
markdown_output = "" | |
current_title = None | |
for quote in quotes: | |
# Extract metadata and content | |
metadata_match = re.search( | |
r'^(.*?)\s+- Your Highlight (?:on page|at location) (\d+(?:-\d+)?) \| Added on (.*?)$', | |
quote, re.MULTILINE | |
) | |
content_match = re.search(r'\n\n(.*?)$', quote, re.DOTALL) | |
if metadata_match and content_match: | |
title = metadata_match.group(1).strip() | |
location = metadata_match.group(2).strip() | |
added_on = metadata_match.group(3).strip() | |
content = content_match.group(1).strip() | |
# If the title changes, add a new Markdown section | |
if title != current_title: | |
current_title = title | |
markdown_output += f"\n## {title}\n" | |
markdown_output += "Content|Loc|Added on|\n" | |
markdown_output += ":-:|:-:|:-:|\n" | |
# Convert date to ISO format | |
try: | |
added_on_dt = datetime.strptime(added_on, "%A, %d %B %Y %H:%M:%S") | |
added_on_iso = added_on_dt.strftime("%Y-%m-%dT%H:%M:%S") | |
except ValueError as e: | |
print(f"Date conversion error: {e}. Using original date format.") | |
added_on_iso = added_on # Fallback to original format if conversion fails | |
# Append the quote to the Markdown table | |
markdown_output += f"{content}|{location}|{added_on_iso}|\n" | |
else: | |
print(f"Warning: Could not parse quote:\n{quote}\n") | |
return markdown_output | |
# Main function to handle input and output | |
if __name__ == "__main__": | |
import sys | |
input_text = sys.stdin.read() | |
markdown_output = parse_quotes_to_markdown(input_text) | |
print(markdown_output) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment