Skip to content

Instantly share code, notes, and snippets.

@YuRen-tw
Created January 30, 2021 00:27
Show Gist options
  • Save YuRen-tw/ae11933287d57012ed4979b52cc38678 to your computer and use it in GitHub Desktop.
Save YuRen-tw/ae11933287d57012ed4979b52cc38678 to your computer and use it in GitHub Desktop.
import re
def repl_map(xs, ys, none='', take=0):
mapping = dict(zip(xs, ys))
def f(matchobj):
item = matchobj.group(take)
return mapping.get(item, none)
return f
def replace_pipe(script, replacements):
for regex, repl in replacements:
script = re.sub(regex, repl, script)
return script
BO = 'aeiounm'
NUM = '235789'
MARK_BO = {
'a': ['á','à','â','ā','a̍','a̋'],
'e': ['é','è','ê','ē','e̍','e̋'],
'i': ['í','ì','î','ī','i̍','i̋'],
'o': ['ó','ò','ô','ō','o̍','ő'],
'u': ['ú','ù','û','ū','u̍','ű'],
'n': ['ń','ǹ','n̂','n̄','n̍','n̋'],
'm': ['ḿ','m̀','m̂','m̄','m̍','m̋']
}
NUM_BO = { bo: [f'{bo}{n}' for n in NUM] for bo in BO }
mark_to_num_replacements = [
*(
('|'.join(MARK_BO[bo]), repl_map(MARK_BO[bo], NUM_BO[bo]))
for bo in BO
),
(r'(\d)([^-\s]*?)\b', r'\2\1'),
(r'(?<=[ptkh])\b', '4'),
(r'(?<=[^\d\s-])\b', '1')
]
TL_to_Yu_replacements = [
(r'-|,|;|\.', ' '),
(r'\s+', ' '),
(r'(\d)', r' \1'),
(r'ng', 'q'),
(r'l', 'd'),
(r'j', 'z'),
(r'ts', 'c'),
(r'(p|t|k|c)h', repl_map('ptkc', 'PTKC', take=1)),
(r'oo', 'O'),
(r'o(?!nn|m|p|q|k)', 'ø'),
(r'O', 'o'),
(r'a(i|u)', repl_map('iu', 'IU', take=1)),
(r'([iu]?[iuaoeIU])nn', r'N\1'),
(r'h\b', ''),
(r'(p|t|k)\b', repl_map('ptk', 'BDG', take=1)),
(r'(m|n|q)\b', repl_map('mnq', 'BDG', take=1)),
(r'iG', 'iøG'),
(r'(m|n)G', repl_map('mn', ['bG', 'dG'], take=1)),
(r'(m|n|q)', repl_map('mnq', ['bN', 'dN', 'gN'], take=1)),
#(r'ør', 'ø'),
#(r'ir', 'y'),
#(r'er', 'Ø'),
#(r'ee', 'E'),
(r'\s(\d)', r'\1')
]
def mark_to_num(script):
return replace_pipe(script, mark_to_num_replacements)
def TL_to_Yu(TL):
TL = TL.lower()
TL = mark_to_num(TL)
Yu = replace_pipe(TL, TL_to_Yu_replacements)
return Yu.strip()
if __name__ == '__main__':
def test(TL):
print('{}\n{}\n'.format(TL, TL_to_Yu(TL)))
test('Li̍p-Jîn Phann')
# diB8 ziD5 PNa1
test('ang áng àng ak âng āng a̍k a̋ng')
# aG1 aG2 aG3 aG4 aG5 aG7 aG8 aG9
test('o io oo onn ionn om op ong ok')
# ø1 iø1 o1 No1 Nio1 oB1 oB4 oG1 oG4
test('ka kah kann kannh kam kap kan kat kang kak')
# ka1 ka4 kNa1 kNa4 kaB1 kaB4 kaD1 kaD4 kaG1 kaG4
test('ka kah kann kannh ga gah nga ngah')
# ka1 ka4 kNa1 kNa4 ga1 ga4 gNa1 gNa4
test('m mh ng ngh png mng nng hng')
# B1 B4 G1 G4 pG1 bG1 dG1 hG1
test('im ip in it ing ik')
# iB1 iB4 iD1 iD4 iøG1 iøG4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment