Skip to content

Instantly share code, notes, and snippets.

@paiv
Last active May 25, 2025 11:30
Show Gist options
  • Select an option

  • Save paiv/df2f38ed86a103471a49cfa8064d0d2e to your computer and use it in GitHub Desktop.

Select an option

Save paiv/df2f38ed86a103471a49cfa8064d0d2e to your computer and use it in GitHub Desktop.
Monkeytype Ukrainian to Latin transliteration
#!/usr/bin/env python
import json
import logging
import re
import uklatn
from pathlib import Path
logger = logging.getLogger(Path(__file__).name)
def uk2latn(s, table=None):
s = uklatn.encode(s, table=table)
s = re.sub(r'^ĵ', 'j', s)
return s
def process(obj, table=None):
obj['name'] = obj['name'].replace('ukrainian', 'ukrainian_latynka')
obj['words'] = list({uk2latn(w, table):0 for w in obj['words']})
return obj
def main(args):
table = getattr(uklatn, args.table.upper())
for fn in args.file:
logger.debug(f'read {fn}')
with Path(fn).open() as fp:
obj = json.load(fp)
res = process(obj, table=table)
fo = Path(fn).with_stem(Path(fn).stem.replace('ukrainian', 'ukrainian_latynka'))
logger.debug(f'write {fo}')
with fo.open('w') as fp:
json.dump(res, fp, indent=2, ensure_ascii=False)
fp.write('\n')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('file', nargs='+', help='json file to transliterate')
parser.add_argument('-t', '--table', choices=['DSTU_9112_A', 'DSTU_9112_B', 'KMU_55'],
default='DSTU_9112_A', help='transliteration system (default: %(default)s)')
parser.add_argument('-v', '--verbose', action='store_true',
help='verbose output')
args = parser.parse_args()
level = logging.DEBUG if args.verbose else logging.WARNING
logging.basicConfig(level=level)
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment