Last active
May 25, 2025 11:30
-
-
Save paiv/df2f38ed86a103471a49cfa8064d0d2e to your computer and use it in GitHub Desktop.
Monkeytype Ukrainian to Latin transliteration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import json | |
| import logging | |
| import re | |
| import uklatn | |
| from pathlib import Path | |
| logger = logging.getLogger(Path(__file__).name) | |
| def uk2latn(s, table=None): | |
| s = uklatn.encode(s, table=table) | |
| s = re.sub(r'^ĵ', 'j', s) | |
| return s | |
| def process(obj, table=None): | |
| obj['name'] = obj['name'].replace('ukrainian', 'ukrainian_latynka') | |
| obj['words'] = list({uk2latn(w, table):0 for w in obj['words']}) | |
| return obj | |
| def main(args): | |
| table = getattr(uklatn, args.table.upper()) | |
| for fn in args.file: | |
| logger.debug(f'read {fn}') | |
| with Path(fn).open() as fp: | |
| obj = json.load(fp) | |
| res = process(obj, table=table) | |
| fo = Path(fn).with_stem(Path(fn).stem.replace('ukrainian', 'ukrainian_latynka')) | |
| logger.debug(f'write {fo}') | |
| with fo.open('w') as fp: | |
| json.dump(res, fp, indent=2, ensure_ascii=False) | |
| fp.write('\n') | |
| if __name__ == '__main__': | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('file', nargs='+', help='json file to transliterate') | |
| parser.add_argument('-t', '--table', choices=['DSTU_9112_A', 'DSTU_9112_B', 'KMU_55'], | |
| default='DSTU_9112_A', help='transliteration system (default: %(default)s)') | |
| parser.add_argument('-v', '--verbose', action='store_true', | |
| help='verbose output') | |
| args = parser.parse_args() | |
| level = logging.DEBUG if args.verbose else logging.WARNING | |
| logging.basicConfig(level=level) | |
| main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment