ChrisVilches · July 6, 2022 16:10
diff --git a/kindle_highlight_clean.py b/kindle_highlight_clean.py
 import sys
 import re

 # TODO: The main code should be inside a __main__, I think.

 FILE = sys.argv[1]

 file = open(FILE, "r", encoding="utf8")
 data = file.read()
 file.close()

 if (
    ("class='bookTitle'" not in data)
    or ("class='noteHeading'" not in data)
    or (not FILE.endswith(".html"))
 ):
    raise Exception("It seems this file is not a Kindle highlight HTML file.")

 original_data = data

 replacement_rules = {
    " .": ".",
    " ,": ",",
    "( ": "(",
    " )": ")",
    " :": ":",
    " ;": ";",
    " ?": "?",
    " !": "!",
    " - ": "-",
    " / ": "/",
    "“ ": "“",
    " ”": "”",
 }

 replacement_rules_regex = {
    "# ([0-9])": "#\\1"
 }

 for key, value in replacement_rules.items():
    data = data.replace(key, value)

 for key, value in replacement_rules_regex.items():
    data = re.sub(key, value, data)

 if original_data == data:
    print("Data didn't change.")
 else:
    print("Data changed.")
    file = open(FILE, "w", encoding="utf8")
    file.write(data)
    file.close()

 print("OK")
	import sys
	import re

	# TODO: The main code should be inside a __main__, I think.

	FILE = sys.argv[1]

	file = open(FILE, "r", encoding="utf8")
	data = file.read()
	file.close()

	if (
	("class='bookTitle'" not in data)
	or ("class='noteHeading'" not in data)
	or (not FILE.endswith(".html"))
	):
	raise Exception("It seems this file is not a Kindle highlight HTML file.")

	original_data = data

	replacement_rules = {
	" .": ".",
	" ,": ",",
	"( ": "(",
	" )": ")",
	" :": ":",
	" ;": ";",
	" ?": "?",
	" !": "!",
	" - ": "-",
	" / ": "/",
	"“ ": "“",
	" ”": "”",
	}

	replacement_rules_regex = {
	"# ([0-9])": "#\\1"
	}

	for key, value in replacement_rules.items():
	data = data.replace(key, value)

	for key, value in replacement_rules_regex.items():
	data = re.sub(key, value, data)

	if original_data == data:
	print("Data didn't change.")
	else:
	print("Data changed.")
	file = open(FILE, "w", encoding="utf8")
	file.write(data)
	file.close()

	print("OK")