Created
November 9, 2020 15:49
-
-
Save flodolo/7f96a84a97d70b41f2c1537bb9d41301 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# | |
# update_other_locales.py <base_l10n_folder> <xliff_filename> [optional list of locales] | |
# | |
# For each folder (locale) available in base_l10n_folder: | |
# | |
# 1. Read existing translations, store them in an array: IDs use the structure | |
# file_name:string_id:source_hash. Using the hash of the source string | |
# prevent from keeping an existing translation if the ID doesn't change | |
# but the source string does. | |
# | |
# 2. Inject available translations in the reference XLIFF file, updating | |
# the target-language where available on file elements. | |
# | |
# 3. Store the updated content in existing locale files, without backup. | |
from copy import deepcopy | |
from glob import glob | |
from lxml import etree | |
import argparse | |
import os | |
import sys | |
NS = {'x':'urn:oasis:names:tc:xliff:document:1.2'} | |
def indent(elem, level=0): | |
# Prettify XML output | |
# http://effbot.org/zone/element-lib.htm#prettyprint | |
i = '\n' + level*' ' | |
if len(elem): | |
if not elem.text or not elem.text.strip(): | |
elem.text = i + ' ' | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
for elem in elem: | |
indent(elem, level+1) | |
if not elem.tail or not elem.tail.strip(): | |
elem.tail = i | |
else: | |
if level and (not elem.tail or not elem.tail.strip()): | |
elem.tail = i | |
def main(): | |
# Base parameters, there should be no need to change these unless | |
# there are more locales to exclude. | |
reference_locale = 'en' | |
excluded_locales = [reference_locale] | |
parser = argparse.ArgumentParser() | |
parser.add_argument('base_folder', help='Path to folder including subfolders for all locales') | |
parser.add_argument('xliff_filename', help='Name of the XLIFF file to process') | |
parser.add_argument('locales', nargs='*', help='Locales to process') | |
args = parser.parse_args() | |
# Get a list of files to update (absolute paths) | |
base_folder = os.path.realpath(args.base_folder) | |
reference_file_path = os.path.join(base_folder, reference_locale, args.xliff_filename) | |
if not os.path.isfile(reference_file_path): | |
print(f"Requested reference file doesn't exist: {reference_file_path}") | |
sys.exit(1) | |
file_paths = [] | |
if not args.locales: | |
for xliff_path in glob(base_folder + '/*/' + args.xliff_filename): | |
parts = xliff_path.split(os.sep) | |
if not parts[-2] in excluded_locales: | |
file_paths.append(xliff_path) | |
else: | |
for locale in args.locales: | |
if locale in excluded_locales: | |
print(f'Requested locale is in the list of excluded locales: {locale}') | |
continue | |
if os.path.isdir(locale): | |
file_paths.append(os.path.join(base_folder, locale, args.xliff_filename)) | |
else: | |
print(f"Requested locale doesn't exist: {locale}") | |
if not file_paths: | |
print('No locales updated.') | |
else: | |
file_paths.sort() | |
# Read reference XML file | |
try: | |
reference_tree = etree.parse(reference_file_path) | |
reference_root = reference_tree.getroot() | |
# Remove all <context-group> elements | |
for context_group in reference_root.xpath('//x:context-group', namespaces=NS): | |
context_group.getparent().remove(context_group) | |
except Exception as e: | |
print("ERROR: Can't parse reference {reference_locale} file") | |
print(e) | |
sys.exit(1) | |
for file_path in file_paths: | |
print(f'Updating {file_path}') | |
# Make a copy of the reference tree and root | |
reference_tree_copy = deepcopy(reference_tree) | |
reference_root_copy = reference_tree_copy.getroot() | |
# Read localized XML file | |
try: | |
locale_tree = etree.parse(file_path) | |
locale_root = locale_tree.getroot() | |
except Exception as e: | |
print(f"ERROR: Can't parse {file_path}") | |
print(e) | |
continue | |
# Using locale folder as locale code for the target-language attribute. | |
# This can be use to map a locale code to a different one. | |
# Structure: "locale folder" -> "locale code" | |
locale_code = file_path.split(os.sep)[-2] | |
locale_mapping = {} | |
locale_code = locale_mapping.get(locale_code, locale_code) | |
# Store existing localizations | |
translations = {} | |
for trans_node in locale_root.xpath('//x:trans-unit', namespaces=NS): | |
for child in trans_node.xpath('./x:target', namespaces=NS): | |
file_name = trans_node.getparent().getparent().get('original') | |
source_string = trans_node.xpath('./x:source', namespaces=NS)[0].text | |
string_id = f"{file_name}:{trans_node.get('id')}:{hash(source_string)}" | |
translations[string_id] = child.text | |
# Inject available translations in the reference XML | |
for trans_node in reference_root_copy.xpath('//x:trans-unit', namespaces=NS): | |
file_name = trans_node.getparent().getparent().get('original') | |
source_string = trans_node.xpath('./x:source', namespaces=NS)[0].text | |
original_id = trans_node.get('id') | |
string_id = f"{file_name}:{original_id}:{hash(source_string)}" | |
updated = False | |
translated = string_id in translations | |
for child in trans_node.xpath('./x:target', namespaces=NS): | |
if translated: | |
child.text = translations[string_id] | |
else: | |
# No translation available, remove the target | |
child.getparent().remove(child) | |
updated = True | |
if translated and not updated: | |
# Translation is available, but reference has no target. | |
# Create a target node and insert it after source. | |
child = etree.Element('target') | |
child.text = translations[string_id] | |
trans_node.insert(1, child) | |
# Update target-language where defined | |
for file_node in reference_root_copy.xpath('//x:file', namespaces=NS): | |
if file_node.get('target-language'): | |
file_node.set('target-language', locale_code) | |
# Replace the existing locale file with the new XML content | |
with open(file_path, 'w') as fp: | |
# Fix indentations | |
indent(reference_root_copy) | |
xliff_content = etree.tostring( | |
reference_tree_copy, | |
encoding='UTF-8', | |
xml_declaration=True, | |
pretty_print=True | |
) | |
fp.write(xliff_content.decode('utf-8')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment