Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created November 9, 2020 15:49
Show Gist options
  • Save flodolo/7f96a84a97d70b41f2c1537bb9d41301 to your computer and use it in GitHub Desktop.
Save flodolo/7f96a84a97d70b41f2c1537bb9d41301 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
#
# update_other_locales.py <base_l10n_folder> <xliff_filename> [optional list of locales]
#
# For each folder (locale) available in base_l10n_folder:
#
# 1. Read existing translations, store them in an array: IDs use the structure
# file_name:string_id:source_hash. Using the hash of the source string
# prevent from keeping an existing translation if the ID doesn't change
# but the source string does.
#
# 2. Inject available translations in the reference XLIFF file, updating
# the target-language where available on file elements.
#
# 3. Store the updated content in existing locale files, without backup.
from copy import deepcopy
from glob import glob
from lxml import etree
import argparse
import os
import sys
NS = {'x':'urn:oasis:names:tc:xliff:document:1.2'}
def indent(elem, level=0):
# Prettify XML output
# http://effbot.org/zone/element-lib.htm#prettyprint
i = '\n' + level*' '
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + ' '
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def main():
# Base parameters, there should be no need to change these unless
# there are more locales to exclude.
reference_locale = 'en'
excluded_locales = [reference_locale]
parser = argparse.ArgumentParser()
parser.add_argument('base_folder', help='Path to folder including subfolders for all locales')
parser.add_argument('xliff_filename', help='Name of the XLIFF file to process')
parser.add_argument('locales', nargs='*', help='Locales to process')
args = parser.parse_args()
# Get a list of files to update (absolute paths)
base_folder = os.path.realpath(args.base_folder)
reference_file_path = os.path.join(base_folder, reference_locale, args.xliff_filename)
if not os.path.isfile(reference_file_path):
print(f"Requested reference file doesn't exist: {reference_file_path}")
sys.exit(1)
file_paths = []
if not args.locales:
for xliff_path in glob(base_folder + '/*/' + args.xliff_filename):
parts = xliff_path.split(os.sep)
if not parts[-2] in excluded_locales:
file_paths.append(xliff_path)
else:
for locale in args.locales:
if locale in excluded_locales:
print(f'Requested locale is in the list of excluded locales: {locale}')
continue
if os.path.isdir(locale):
file_paths.append(os.path.join(base_folder, locale, args.xliff_filename))
else:
print(f"Requested locale doesn't exist: {locale}")
if not file_paths:
print('No locales updated.')
else:
file_paths.sort()
# Read reference XML file
try:
reference_tree = etree.parse(reference_file_path)
reference_root = reference_tree.getroot()
# Remove all <context-group> elements
for context_group in reference_root.xpath('//x:context-group', namespaces=NS):
context_group.getparent().remove(context_group)
except Exception as e:
print("ERROR: Can't parse reference {reference_locale} file")
print(e)
sys.exit(1)
for file_path in file_paths:
print(f'Updating {file_path}')
# Make a copy of the reference tree and root
reference_tree_copy = deepcopy(reference_tree)
reference_root_copy = reference_tree_copy.getroot()
# Read localized XML file
try:
locale_tree = etree.parse(file_path)
locale_root = locale_tree.getroot()
except Exception as e:
print(f"ERROR: Can't parse {file_path}")
print(e)
continue
# Using locale folder as locale code for the target-language attribute.
# This can be use to map a locale code to a different one.
# Structure: "locale folder" -> "locale code"
locale_code = file_path.split(os.sep)[-2]
locale_mapping = {}
locale_code = locale_mapping.get(locale_code, locale_code)
# Store existing localizations
translations = {}
for trans_node in locale_root.xpath('//x:trans-unit', namespaces=NS):
for child in trans_node.xpath('./x:target', namespaces=NS):
file_name = trans_node.getparent().getparent().get('original')
source_string = trans_node.xpath('./x:source', namespaces=NS)[0].text
string_id = f"{file_name}:{trans_node.get('id')}:{hash(source_string)}"
translations[string_id] = child.text
# Inject available translations in the reference XML
for trans_node in reference_root_copy.xpath('//x:trans-unit', namespaces=NS):
file_name = trans_node.getparent().getparent().get('original')
source_string = trans_node.xpath('./x:source', namespaces=NS)[0].text
original_id = trans_node.get('id')
string_id = f"{file_name}:{original_id}:{hash(source_string)}"
updated = False
translated = string_id in translations
for child in trans_node.xpath('./x:target', namespaces=NS):
if translated:
child.text = translations[string_id]
else:
# No translation available, remove the target
child.getparent().remove(child)
updated = True
if translated and not updated:
# Translation is available, but reference has no target.
# Create a target node and insert it after source.
child = etree.Element('target')
child.text = translations[string_id]
trans_node.insert(1, child)
# Update target-language where defined
for file_node in reference_root_copy.xpath('//x:file', namespaces=NS):
if file_node.get('target-language'):
file_node.set('target-language', locale_code)
# Replace the existing locale file with the new XML content
with open(file_path, 'w') as fp:
# Fix indentations
indent(reference_root_copy)
xliff_content = etree.tostring(
reference_tree_copy,
encoding='UTF-8',
xml_declaration=True,
pretty_print=True
)
fp.write(xliff_content.decode('utf-8'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment