Skip to content

Instantly share code, notes, and snippets.

@okurka12
Last active July 21, 2024 01:07
Show Gist options
  • Save okurka12/755f2c92612b190c7058695dd52f8deb to your computer and use it in GitHub Desktop.
Save okurka12/755f2c92612b190c7058695dd52f8deb to your computer and use it in GitHub Desktop.
Replaces letters with diacritics in a czech text
#
# Prints stdin input without diacritics.
# Expects a text in czech language.
#
# Author: okurka12
#
# Date: 2024-07-21
#
# reads from stdin and outputs on stdout
#
import sys
PRINT_BLOAT = False
original_string = sys.stdin.read()
# replacements for a czech text
replacements = {
"ě": "e",
"š": "s",
"č": "c",
"ř": "r",
"ž": "z",
"ý": "y",
"á": "a",
"í": "i",
"é": "e",
"ů": "u",
"ú": "u",
"ď": "d",
"ť": "t",
"ň": "n",
"ó": "o",
"Ě": "E",
"Š": "S",
"Č": "C",
"Ř": "R",
"Ž": "Z",
"Ý": "Y",
"Á": "A",
"Í": "I",
"É": "E",
"Ů": "U",
"Ú": "U",
"Ď": "D",
"Ť": "T",
"Ň": "N",
"Ó": "O",
"„": "\"",
"“": "\""
}
# perform the replacing
asciified_string = ""
for letter in original_string:
# pick a new letter
new_letter = ""
if letter in replacements:
new_letter = replacements[letter]
elif ord(letter) <= 0x7e:
new_letter = letter
else:
new_letter = "?"
asciified_string += new_letter
# check that the string is indeed ascii
for letter in asciified_string:
if PRINT_BLOAT:
print(letter, ord(letter))
assert ord(letter) <= 0x7e
# print
if PRINT_BLOAT:
print()
print(asciified_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment