Created
September 26, 2024 11:19
-
-
Save planetis-m/8a64db9e479ee227e3e76193b6d3243b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def greek_vowels_iterator(word): | |
vowels = 'αάεέηήιίοόυύωώϊϋΐΰ' | |
diphthongs = { | |
'αι', 'ει', 'οι', 'υι', 'αυ', 'ευ', 'ου', | |
'αί', 'εί', 'οί', 'υί', 'αύ', 'εύ', 'ού', | |
'αη', 'αϊ', 'οη', 'όη', 'οϊ', 'άι', 'όι', 'εϊ' | |
} | |
spurious_diphthongs = 'ιυ' | |
spurious_diphthongs_long = {'οι', 'ει'} | |
vowel_digraphs = { | |
'αι', 'ει', 'οι', 'ου', 'υι', | |
'αί', 'εί', 'οί', 'ού', 'υί' | |
} | |
qualifying_vowels = 'αοεάόέ' | |
word_len = len(word) | |
i = 0 | |
while i < word_len: | |
start = i | |
# Handle short spurious diphthongs ('ι', 'υ') | |
if i < word_len - 1 and word[i] in spurious_diphthongs: | |
# Check if the next two characters form a vowel digraph | |
if i < word_len - 2 and word[i+1:i+3] in vowel_digraphs: | |
i += 3 | |
# Check if the next character is a qualifying vowel | |
elif word[i+1] in qualifying_vowels: | |
i += 2 | |
else: | |
i += 1 | |
yield word[start:i] | |
# Handle long spurious diphthongs ('οι', 'ει') | |
elif i < word_len - 2 and word[i:i+2] in spurious_diphthongs_long: | |
if i < word_len - 3 and word[i+2:i+4] in vowel_digraphs: | |
i += 4 | |
elif word[i+2] in qualifying_vowels: | |
i += 3 | |
else: | |
i += 2 | |
yield word[start:i] | |
else: | |
# Handle standard diphthongs | |
if i < word_len - 1 and word[i:i+2] in diphthongs: | |
i += 2 | |
yield word[start:i] | |
# Handle single vowels | |
elif word[i] in vowels: | |
i += 1 | |
yield word[start:i] | |
else: | |
i += 1 | |
def validate_accented_word(word): | |
"""Validate if a Greek word is accented""" | |
accented_vowels = 'άέήίόύώΐΰ' | |
vowels = 'αάεέηήιίοόυύωώϊϋΐΰ' | |
def is_accented(vowel): | |
for ch in vowel: | |
if ch in accented_vowels: return True | |
return False | |
# Check if the word is a contraction by checking its beginning/ending | |
is_contraction_end = word[-1] in "'’" | |
is_contraction = is_contraction_end or word[0] in "'’" | |
# Handle exceptions for single-syllable words | |
exceptions = {'ή', 'πού', 'πώς', 'µού', 'σού', 'τού', 'τήν', | |
'τής', 'τόν', 'τό', 'µάς', 'σάς', 'τούς', 'τά'} | |
vowel_components = list(greek_vowels_iterator(word)) | |
num_syllables = len(vowel_components) | |
# If there's only one vowel, handle exceptions for single-syllable words | |
if num_syllables == 1 and not is_contraction: | |
return is_accented(vowel_components[0]) == bool(word in exceptions) # XNOR | |
# Initialize variables to track accents | |
accented_syllable_count = 0 | |
second_last_accent = False | |
# Iterate over the vowels in the word | |
for i, vowel in enumerate(vowel_components): | |
# Check if the vowel is accented | |
if is_accented(vowel): | |
accented_syllable_count += 1 | |
# An incorrect use of the compound vowels 'αυ', 'ευ' | |
if vowel in 'άέ' and i < num_syllables - 1 and vowel_components[i+1] == 'υ': | |
return False | |
# If an accent is found before the last three syllables, return False | |
if i < num_syllables - 3 or (is_contraction_end and i < num_syllables - 2): | |
return False | |
if i == num_syllables - 2 or (is_contraction_end and i == num_syllables - 1): | |
second_last_accent = True | |
# Check the number of accented syllables in the last three syllables | |
if accented_syllable_count == 3 or (is_contraction and accented_syllable_count == 2): | |
return False # Three accented syllables found | |
# Two accented syllables found, one in the second to last syllable | |
elif accented_syllable_count == 2 and second_last_accent: | |
return False | |
# Check if no accent is found in the last three syllables | |
elif not is_contraction and accented_syllable_count == 0: | |
return False | |
else: | |
return True | |
# Test cases for validate_accented_word function | |
test_cases = [ | |
# Valid Greek words | |
# ("α", False), | |
("Αθήνα", True), | |
("παιδί", True), | |
("άνθρωπος", True), | |
("καλός", True), | |
("λεξικό", True), | |
("προγραμματισμός", True), | |
("ευχαριστώ", True), | |
("όμορφος", True), | |
("αγάπη", True), | |
("ελευθερία", True), | |
("δημοκρατία", True), | |
("γεια", True), | |
("σας", True), | |
("ευχαριστώ", True), | |
("καλημέρα", True), | |
("καλησπέρα", True), | |
("καληνύχτα", True), | |
("μήνυμα", True), | |
("φίλος", True), | |
("οικογένεια", True), | |
("χώρα", True), | |
("πόλη", True), | |
("δρόμος", True), | |
("αυτοκίνητο", True), | |
("σπίτι", True), | |
("τραπέζι", True), | |
("καρέκλα", True), | |
("βιβλίο", True), | |
("μυαλό", True), | |
("ψυχή", True), | |
("χρόνος", True), | |
("ζωή", True), | |
("θάνατος", True), | |
("αγάπη", True), | |
("μίσος", True), | |
("χαρά", True), | |
("λύπη", True), | |
("φόβος", True), | |
("ελπίδα", True), | |
("όνειρο", True), | |
("πραγματικότητα", True), | |
("αλήθεια", True), | |
("ψέμα", True), | |
("δίκαιο", True), | |
("άδικο", True), | |
("πολιτική", True), | |
("οικονομία", True), | |
("τέχνη", True), | |
("μουσική", True), | |
("κινηματογράφος", True), | |
("θεατρικό", True), | |
("λογοτεχνία", True), | |
("ιστορία", True), | |
("φιλοσοφία", True), | |
("θρησκεία", True), | |
("επιστήμη", True), | |
("μαθηματικά", True), | |
("φυσική", True), | |
("χημεία", True), | |
("βιολογία", True), | |
("γλώσσα", True), | |
("γραμματική", True), | |
("λεξιλόγιο", True), | |
("ορθογραφία", True), | |
("συντακτικό", True), | |
("μορφολογία", True), | |
("σημασιολογία", True), | |
("φωνητική", True), | |
("φωνολογία", True), | |
("γραμματολογία", True), | |
("ιστορία", True), | |
("πολιτισμός", True), | |
("κοινωνία", True), | |
("πολιτεία", True), | |
("νόμος", True), | |
("δικαιοσύνη", True), | |
("εγκληματικότητα", True), | |
("αστυνομία", True), | |
("δικαστήριο", True), | |
("φυλακή", True), | |
("υγεία", True), | |
("ιατρική", True), | |
("νοσοκομείο", True), | |
("φάρμακο", True), | |
("παιδεία", True), | |
("σχολείο", True), | |
("δάσκαλος", True), | |
("μαθητής", True), | |
("πανεπιστήμιο", True), | |
("εργασία", True), | |
("επάγγελμα", True), | |
("χρήμα", True), | |
("οικονομία", True), | |
("εμπόριο", True), | |
("βιομηχανία", True), | |
("τεχνολογία", True), | |
("υπολογιστής", True), | |
("διαδίκτυο", True), | |
("κινητό", True), | |
("τηλεόραση", True), | |
("ραδιόφωνο", True), | |
("μουσική", True), | |
("ταινία", True), | |
("θεατρική", True), | |
("παράσταση", True), | |
("βιβλίο", True), | |
("περιοδικό", True), | |
("εφημερίδα", True), | |
("ταξίδι", True), | |
("αεροπλάνο", True), | |
("τρένο", True), | |
("αυτοκίνητο", True), | |
("λεωφορείο", True), | |
("ποδήλατο", True), | |
("φωτογραφία", True), | |
("ζωγραφική", True), | |
("γλυπτική", True), | |
("αρχιτεκτονική", True), | |
("σχεδιασμός", True), | |
("μόδα", True), | |
("τροφή", True), | |
("φαγητό", True), | |
("ποτό", True), | |
("συνταγή", True), | |
("σπιτικό", True), | |
("αθλητισμός", True), | |
("ποδόσφαιρο", True), | |
("μπάσκετ", True), | |
("βόλεϊ", True), | |
("τένις", True), | |
("κολύμβηση", True), | |
("χορός", True), | |
("μουσική", True), | |
("θέατρο", True), | |
("κινηματογράφος", True), | |
("λογοτεχνία", True), | |
("ιστορία", True), | |
("φιλοσοφία", True), | |
("θρησκεία", True), | |
("επιστήμη", True), | |
("μαθηματικά", True), | |
("φυσική", True), | |
("χημεία", True), | |
("βιολογία", True), | |
("γλώσσα", True), | |
("γραμματική", True), | |
("λεξιλόγιο", True), | |
("ορθογραφία", True), | |
("συντακτικό", True), | |
("μορφολογία", True), | |
("σημασιολογία", True), | |
("φωνητική", True), | |
("φωνολογία", True), | |
("γραμματολογία", True), | |
("πολιτική", True), | |
("οικονομία", True), | |
("κοινωνία", True), | |
("δίκαιο", True), | |
("δικαιοσύνη", True), | |
("υγεία", True), | |
("παιδεία", True), | |
("εργασία", True), | |
("χρήμα", True), | |
("τεχνολογία", True), | |
("ταξίδι", True), | |
("τέχνη", True), | |
("τροφή", True), | |
("αθλητισμός", True), | |
("πρόεδρός", True), | |
("φέρετέ", True), | |
("'λεγε", True), # contractions | |
("δείξ'", True), | |
("'ριξε", True), | |
("'ρθω", True), | |
('κάλός', False), # Incorrect | |
('εργον', False), | |
('πάραδειγμά', False), | |
('θέάτρο', False), | |
('ήλεκτρικος', False), | |
('ξερο', False), | |
('προβλήμάτός', False), | |
('μάθήματα', False), | |
('οίκονομία', False), | |
('σχόλείο', False), | |
('πατάτές', False), | |
('φώτογράφός', False) | |
] | |
# Run the test cases | |
for word, expected in test_cases: | |
if not validate_accented_word(word) == expected: | |
print(f"Word: {word}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment