Last active
November 22, 2022 09:30
-
-
Save wbolster/7fead7a261b461a4dd7c8ae680d1452c to your computer and use it in GitHub Desktop.
shuffle dots: text manıṗulatıȯn tool to move dots above letters to another ṗosıṫıon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Text manıṗulatıȯn tool to move dots above letters to another ṗosıṫıon. | |
""" | |
import operator | |
import random | |
import re | |
import unicodedata | |
import sys | |
DOT_MAPPING = { | |
"i": "ı", | |
"j": "ȷ", | |
"\N{COMBINING DOT ABOVE}": "", | |
} | |
LOWER_CASE_LETTERS_SCORES = { | |
**dict.fromkeys("aecıȷouy", 3), # vowel-like | |
**dict.fromkeys("mnrsvwxz", 3), # no ascenders and no descenders | |
**dict.fromkeys("gpq", 3), # descenders | |
**dict.fromkeys("bdfhklt", 1), # ascenders | |
} | |
def shuffle_dots(s: str) -> str: | |
"""Shuffle existing dots above letters to random locations.""" | |
# Decompose to ensure ‘U+0307 ◌̇ COMBINING DOT ABOVE’ characters | |
s = unicodedata.normalize("NFKD", s) | |
n_dots = sum(s.count(char) for char in DOT_MAPPING.keys()) | |
# Remove existing dots | |
for needle, replacement in DOT_MAPPING.items(): | |
s = s.replace(needle, replacement) | |
# Determine new positions, preferring good spots if possible | |
letter_positions_and_scores = [ | |
(pos, LOWER_CASE_LETTERS_SCORES.get(c, 0)) | |
for pos, c in enumerate(s) | |
if (category := unicodedata.category(c)).startswith("L") | |
] | |
random.shuffle(letter_positions_and_scores) | |
letter_positions_and_scores.sort(key=operator.itemgetter(1), reverse=True) | |
new_dot_positions = dict(letter_positions_and_scores[:n_dots]).keys() | |
# Add new dots | |
s = "".join( | |
c + "\N{COMBINING DOT ABOVE}" if pos in new_dot_positions else c | |
for pos, c in enumerate(s) | |
) | |
# Normalize | |
s = s.replace("ı\N{COMBINING DOT ABOVE}", "i") | |
s = s.replace("ȷ\N{COMBINING DOT ABOVE}", "j") | |
s = unicodedata.normalize("NFKC", s) | |
return s | |
def main() -> int: | |
# Use commmand line args as input, or stdin if none given. | |
text = " ".join(sys.argv[1:]) | |
if not text: | |
text = sys.stdin.read() | |
# Shuffle dots within each ‘word’ | |
result = re.sub( | |
r"\w+", | |
lambda m: shuffle_dots(m.group()), | |
text, | |
) | |
print(result) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment