Created
January 30, 2021 00:27
-
-
Save YuRen-tw/ae11933287d57012ed4979b52cc38678 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def repl_map(xs, ys, none='', take=0): | |
mapping = dict(zip(xs, ys)) | |
def f(matchobj): | |
item = matchobj.group(take) | |
return mapping.get(item, none) | |
return f | |
def replace_pipe(script, replacements): | |
for regex, repl in replacements: | |
script = re.sub(regex, repl, script) | |
return script | |
BO = 'aeiounm' | |
NUM = '235789' | |
MARK_BO = { | |
'a': ['á','à','â','ā','a̍','a̋'], | |
'e': ['é','è','ê','ē','e̍','e̋'], | |
'i': ['í','ì','î','ī','i̍','i̋'], | |
'o': ['ó','ò','ô','ō','o̍','ő'], | |
'u': ['ú','ù','û','ū','u̍','ű'], | |
'n': ['ń','ǹ','n̂','n̄','n̍','n̋'], | |
'm': ['ḿ','m̀','m̂','m̄','m̍','m̋'] | |
} | |
NUM_BO = { bo: [f'{bo}{n}' for n in NUM] for bo in BO } | |
mark_to_num_replacements = [ | |
*( | |
('|'.join(MARK_BO[bo]), repl_map(MARK_BO[bo], NUM_BO[bo])) | |
for bo in BO | |
), | |
(r'(\d)([^-\s]*?)\b', r'\2\1'), | |
(r'(?<=[ptkh])\b', '4'), | |
(r'(?<=[^\d\s-])\b', '1') | |
] | |
TL_to_Yu_replacements = [ | |
(r'-|,|;|\.', ' '), | |
(r'\s+', ' '), | |
(r'(\d)', r' \1'), | |
(r'ng', 'q'), | |
(r'l', 'd'), | |
(r'j', 'z'), | |
(r'ts', 'c'), | |
(r'(p|t|k|c)h', repl_map('ptkc', 'PTKC', take=1)), | |
(r'oo', 'O'), | |
(r'o(?!nn|m|p|q|k)', 'ø'), | |
(r'O', 'o'), | |
(r'a(i|u)', repl_map('iu', 'IU', take=1)), | |
(r'([iu]?[iuaoeIU])nn', r'N\1'), | |
(r'h\b', ''), | |
(r'(p|t|k)\b', repl_map('ptk', 'BDG', take=1)), | |
(r'(m|n|q)\b', repl_map('mnq', 'BDG', take=1)), | |
(r'iG', 'iøG'), | |
(r'(m|n)G', repl_map('mn', ['bG', 'dG'], take=1)), | |
(r'(m|n|q)', repl_map('mnq', ['bN', 'dN', 'gN'], take=1)), | |
#(r'ør', 'ø'), | |
#(r'ir', 'y'), | |
#(r'er', 'Ø'), | |
#(r'ee', 'E'), | |
(r'\s(\d)', r'\1') | |
] | |
def mark_to_num(script): | |
return replace_pipe(script, mark_to_num_replacements) | |
def TL_to_Yu(TL): | |
TL = TL.lower() | |
TL = mark_to_num(TL) | |
Yu = replace_pipe(TL, TL_to_Yu_replacements) | |
return Yu.strip() | |
if __name__ == '__main__': | |
def test(TL): | |
print('{}\n{}\n'.format(TL, TL_to_Yu(TL))) | |
test('Li̍p-Jîn Phann') | |
# diB8 ziD5 PNa1 | |
test('ang áng àng ak âng āng a̍k a̋ng') | |
# aG1 aG2 aG3 aG4 aG5 aG7 aG8 aG9 | |
test('o io oo onn ionn om op ong ok') | |
# ø1 iø1 o1 No1 Nio1 oB1 oB4 oG1 oG4 | |
test('ka kah kann kannh kam kap kan kat kang kak') | |
# ka1 ka4 kNa1 kNa4 kaB1 kaB4 kaD1 kaD4 kaG1 kaG4 | |
test('ka kah kann kannh ga gah nga ngah') | |
# ka1 ka4 kNa1 kNa4 ga1 ga4 gNa1 gNa4 | |
test('m mh ng ngh png mng nng hng') | |
# B1 B4 G1 G4 pG1 bG1 dG1 hG1 | |
test('im ip in it ing ik') | |
# iB1 iB4 iD1 iD4 iøG1 iøG4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment