Skip to content

Instantly share code, notes, and snippets.

@planetis-m
Last active September 25, 2024 17:16
Show Gist options
  • Save planetis-m/0b5df9b61de0ff972f1306cf2ab0de4c to your computer and use it in GitHub Desktop.
Save planetis-m/0b5df9b61de0ff972f1306cf2ab0de4c to your computer and use it in GitHub Desktop.
def lower_first_if_title(word):
if word.isupper():
return word
elif word.istitle():
return word[0].lower() + word[1:]
else:
return word
def count_greek_syllables(word):
vowels = 'αάεέηήιίοόυύωώϊϋΐΰ'
diphthongs = {
'αι', 'ει', 'οι', 'υι', 'αυ', 'ευ', 'ου',
'αί', 'εί', 'οί', 'υί', 'αύ', 'εύ', 'ού',
'αη', 'αϊ', 'οη', 'όη', 'οϊ', 'άι', 'όι', 'εϊ'
}
spurious_diphthongs = 'ιυ'
spurious_diphthongs_long = {'οι', 'ει'}
vowel_digraphs = {
'αι', 'ει', 'οι', 'ου', 'υι',
'αί', 'εί', 'οί', 'ού', 'υί'
}
qualifying_vowels = 'αοεάόέ'
word_len = len(word)
syllable_count = 0
i = 0
while i < word_len:
# Handle short spurious diphthongs ('ι', 'υ')
if i < word_len - 1 and word[i] in spurious_diphthongs:
# Check if the next two characters form a vowel digraph
if i < word_len - 2 and word[i+1:i+3] in vowel_digraphs:
i += 3
# Check if the next character is a qualifying vowel
elif word[i+1] in qualifying_vowels:
i += 2
else:
i += 1
syllable_count += 1
# Handle long spurious diphthongs ('οι', 'ει')
elif i < word_len - 2 and word[i:i+2] in spurious_diphthongs_long:
if i < word_len - 3 and word[i+2:i+4] in vowel_digraphs:
i += 4
elif word[i+2] in qualifying_vowels:
i += 3
else:
i += 2
syllable_count += 1
else:
# Handle standard diphthongs
if i < word_len - 1 and word[i:i+2] in diphthongs:
syllable_count += 1
i += 2
# Handle single vowels
elif word[i] in vowels:
syllable_count += 1
i += 1
else:
i += 1
return syllable_count
# Test the function
test_words = ['καλημέρα', 'άνθρωπος', 'ευχαριστώ', 'δίφθογγος', 'αίμα', 'είναι', 'ούτε', # start of special
'Σάββατο', 'κόκκινος', 'βλέμμα', 'ιππότης', 'κόλλα', 'υπάλληλος', 'θάρρος',
'σύννεφο', 'Σάββας', 'γράμμα', 'μαλλί', 'παππούς', 'συλλαβή', 'άρρωστος',
'επίρρημα', 'κόμμα', 'εννοώ', 'λάκκος', 'άμμος', 'βύσσινο', 'γλώσσα', 'θάλασσα',
'κρεμμύδι', 'κύτταρο', 'εννιά', 'μέλισσα', 'εκκλησία', 'ελάττωμα', 'γενναίος',
'ήλιος', 'ελιές', 'παλιού', 'ποια', 'ποιοι', 'ποιες', 'ποιους', 'άδειες',
'άδειοι', 'δίχτυα', 'διχτυού', 'Γιάννα', 'ποιος', 'άδειασε', 'γυάλα', 'αηδόνι',
'βόηθα', 'αϊτός', 'γάιδαρος', 'κοροϊδεύω', 'ρόιδι', # up to this point ok
'αδειάζω', 'αδειανός', 'άδειασμα', 'άδειος', 'μπάνιο', # ok
'αγάπη', 'ποίημα', 'δουλειά', 'αδειάζω', 'κάποιος', 'κρυώνω', 'ησυχία',
'ανθόκηπος', 'άλογο', 'τσουγκράνα', 'αυτοκίνητο', 'Ιωάννα', 'ευτυχία',
'κόκκινος', 'λαχανόκηπος', 'κινδυνεύω', 'πίεσα', 'βόλεϊ',
'Αττική', 'Υμηττός', 'Έλληνας', 'Άννα', 'Αλβανία', # ok
'κορόιδο', 'κορόιδεμα', 'κοροϊδευτικός', 'κοροϊδεύω', 'κοροϊδία', 'κοροϊδίστικος', # ok
'μία', 'δύο', 'βίος', 'ή', 'πού', 'πώς', # ok
'µια', 'δυο', 'για', 'γεια', 'πια', 'ποιος', 'γιος', 'νιος', 'πιες', 'πιε', # one syllable
'άδεια', 'αδειοδότηση', 'αδειοδοτικός', 'αδειοδοτώ', 'αδειοδωρόσημο', 'αδειούχος', # all wrong
'διαβατήριο', 'Εύβοια', 'πιω'] # +3 wrong
for word in test_words:
print(f"{word}: {count_greek_syllables(lower_first_if_title(word))} syllables")
# Misc
def is_diaeresis_correct(word):
diaeresis_chars = 'ϊϋΐΰ'
vowels_with_accent = 'άέήίόύώ'
diphthongs = {'αι', 'ει', 'οι', 'υι', 'αυ', 'ευ', 'ου'}
diaeresis_found = False
for i, char in enumerate(word):
if char in diaeresis_chars:
if diaeresis_found:
return False
else: diaeresis_found = True
prev_char = word[i-1]
# Rule 1: Check if the preceding vowel is stressed.
if i >= 1 and prev_char in vowels_with_accent:
return False
# Rule 2: Check if there's a diphthong before the diaeresis.
# (Accented diphthongs are caught by the previous rule.)
if i >= 2 and word[i-2:i] in diphthongs:
return False
# Rule 3: Check if there's a non-diphthong in the word.
# Only consider vowel combinations ending in ι or υ (e.g., ηυ, ιυ, ωυ, ηι, ωι).
if i >= 1 and ((char in 'ϊΐ' and prev_char in 'ηω') or \
(char in 'ϋΰ' and prev_char in 'ηιω')):
return False
# If none of the rules apply, diaeresis might be necessary.
return True
def remove_greek_accents(text):
# Normalize to decomposed form (NFD)
nfd_form = unicodedata.normalize('NFD', text)
# Remove combining diacritical marks from capital letters, but keep dieresis marks
result = []
for c in nfd_form:
if unicodedata.combining(c) and c != '̈':
# Check if the base character is a capital letter
base_char = nfd_form[nfd_form.index(c) - 1]
if 'Α' <= base_char <= 'Ω':
continue # Skip combining marks for capital letters
result.append(c)
# Reconstruct the string
return unicodedata.normalize('NFC', ''.join(result))
def count_greek_accents(word):
accented_vowels = 'άέήίόύώΐΰΆΈΉΊΌΎΏ'
accent_count = 0
for char in word:
if char in accented_vowels:
accent_count += 1
return accent_count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment