Last active
August 6, 2025 03:32
-
-
Save aperture147/cd9cf1a713f158bfea1c8e6877c5dade to your computer and use it in GitHub Desktop.
A simple Python function to convert integer numbers into Vietnamese pronunciation. Use this function to preprocess text before feeding it into a TTS model. It works best for numbers less than one trillion, as I’m unsure how numbers beyond that are pronounced in Vietnamese — and honestly, such large numbers rarely come up.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
rule_1_pronunciation_list = ['', 'mốt', 'hai', 'ba', 'tư', 'lăm', 'sáu', 'bảy', 'tám', 'chín'] | |
rule_2_pronunciation_list = ['không', 'một', 'hai', 'ba', 'bốn', 'năm', 'sáu', 'bảy', 'tám', 'chín'] | |
# no support for decimal numbers, because honestly I don't really know now to pronounce them in Vietnamese | |
# although I'm a native speaker. | |
a = 123123213 | |
def pronounce_number(a: int) -> str: | |
if a == 0: | |
return 'không' | |
suffix_cycle = itertools.cycle(['nghìn', 'triệu', 'tỷ']) | |
suffix = '' | |
result = '' | |
prefix = 'âm' if a < 0 else '' | |
a = abs(a) | |
while a > 0: | |
b = a % 1000 | |
b1 = b % 10 | |
b2 = int(b / 10) % 10 | |
b3 = int(b / 100) | |
if a >= 999: # handle for the remaining 3 digits | |
b3_pron = f'{rule_2_pronunciation_list[b3]} trăm' if b2 or b1 else '' | |
else: | |
b3_pron = '' if not b3 else f'{rule_2_pronunciation_list[b3]} trăm' | |
b2_pron = '' if not b2 else 'mười' if (b2 == 1) else f'{rule_2_pronunciation_list[b2]} mươi' | |
if not b3 and not b2: | |
b1_pron = rule_1_pronunciation_list[b1] | |
else: | |
b1_pron = (('lẻ ' if not b2 and b1 else '') + rule_1_pronunciation_list[b1]) | |
pron = '' | |
if b3_pron: | |
pron += f'{b3_pron} ' | |
if b2_pron: | |
pron += f'{b2_pron} ' | |
if b1_pron: | |
pron += f'{b1_pron} ' | |
if pron: | |
pron += f'{suffix} ' | |
pron = pron.strip() | |
result = f'{pron} {result}'.strip() | |
suffix = next(suffix_cycle) | |
a = a // 1000 | |
if prefix: | |
result = f'{prefix} {result}'.strip() | |
return result.strip() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment