Last active
October 23, 2020 02:58
-
-
Save minorua/8515361 to your computer and use it in GitHub Desktop.
[QGIS][translation] a translation check script for ja
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET TS_FILE=%1 | |
C:\Python38\python %~dp0check_translation.py -ct -cand %TS_FILE% > check.diff | |
pause |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# purpose : check translations | |
# begin : 2013-10-28 | |
import sys, os | |
import re | |
from xml.etree import ElementTree | |
list_all = False | |
check_translations = False | |
only_total = False | |
list_candidates = False | |
sep = "\t" | |
fs1 = re.compile("%[0-9a-z]") | |
fs2 = re.compile("{.*?}") | |
acc = re.compile("&[A-Za-z]") # accelerator | |
asc = re.compile(r"^[\x20-\x7E]+$") # ASCII character | |
tags = re.compile("[a-zA-Z0-9]+,[a-zA-Z0-9]+,.+") # algorithm tags | |
def checkTranslation(s, t): | |
if t is None: | |
return "None" | |
if t.strip() == "": | |
return "empty translation" | |
if s[-3:] == "..." and t[-3:] != "...": | |
return "missing ... (3 dots)" | |
if s[-1] == u"…" and t[-3:] != "...": | |
return "missing ... (must be 3 dots)" | |
if u"…" in t: | |
return "ellipsis used" | |
if "...." not in s and u"……" not in s and "...." in t: | |
return ".... (4 dots) used" | |
if sorted(fs1.findall(s)) != sorted(fs1.findall(t)): | |
return "format specifiers unmatched (%)" | |
if sorted(fs2.findall(s)) != sorted(fs2.findall(t)): | |
return "format specifiers unmatched ({})" | |
accelerator = acc.findall(s) | |
if accelerator and not accelerator[0].upper() in t.upper(): | |
return "no accelerator" | |
#if re.search(u"[0-9]", t): | |
# return "multi-byte number used" | |
if re.search(u" ", t): | |
return "multi-byte space used" | |
if re.search(u"[一-龠ぁ-んァ-ヴ][A-Za-z][ぁ-ん]", t): | |
return "single multi-byte alphabet used" | |
if asc.search(t) and s.replace(u"…", "...").replace("/en/", "/ja/") != t: | |
return "translation with only ASCII chars, but different from source" | |
if tags.search(s) and " " not in s: | |
if not set(s.split(",")).issubset(t.split(",")): | |
return "translation doesn't contain all tags of source" | |
return "" | |
def countTranslations(filename): | |
tree = ElementTree.parse(filename) | |
ts = tree.getroot() | |
if not only_total: | |
# print column names | |
print(sep.join(["context", "messages", "unfinished", "empty", "filename"])) | |
warnings = [] | |
total = [0] * 3 | |
for context in ts: | |
name = context.find("name").text | |
messages = context.findall("message") | |
unfinished = len(context.findall(".//translation[@type='unfinished']")) | |
plugin_name = "" | |
source_path = "" | |
# count empty translations and check translations | |
empty = 0 | |
for message in messages: | |
if not plugin_name: | |
location = message.find("location") | |
if location is not None: | |
source_path = location.get("filename") | |
if "/python/plugins/" in source_path: | |
plugin_name = source_path.replace("../python/plugins/", "").replace("/python-i18n.cpp", "") | |
translation = message.find("translation") | |
if translation.get("type") in ("vanished", "obsolete"): | |
continue | |
source = message.find("source") | |
numerusform = translation.find("numerusform") | |
translation_text = translation.text if numerusform is None else numerusform.text | |
if translation_text is None: | |
empty += 1 | |
continue | |
if check_translations: | |
warning = checkTranslation(source.text, translation_text) | |
if warning: | |
warnings.append(sep.join([warning, name, escape(source.text[:50]), escape(translation.text[:50])])) | |
if not plugin_name: | |
name = "** " + name | |
#name += " (" + plugin_name + ")" | |
nums = [len(messages), unfinished, empty] | |
total = map(lambda x,y: x+y, total, nums) | |
if not only_total and (list_all or unfinished > 0 or empty > 0): | |
print(sep.join([name] + list(map(str, nums)) + [source_path])) | |
# print total | |
if only_total: | |
print(sep.join(map(str, total))) | |
else: | |
print(sep.join(["Total"] + list(map(str, total)))) | |
# print warnings | |
if len(warnings) > 0: | |
print("") | |
print("Warnings") | |
print(sep.join(["warning", "classname", "source", "translation"])) | |
print("\n".join(warnings)) | |
return 0 | |
#EXPERIMENTAL | |
def listCandidates(filename): | |
tree = ElementTree.parse(filename) | |
ts = tree.getroot() | |
print("\nCandidates") | |
dic = {} | |
messages = ts.findall(".//message") | |
for message in messages: | |
source = message.find("source") | |
translation = message.find("translation") | |
numerusform = translation.find("numerusform") | |
dic[source.text] = translation.text if numerusform is None else numerusform.text | |
messages = ts.findall(".//message") | |
for message in messages: | |
source = message.find("source") | |
translation = message.find("translation") | |
if translation.get("type") == "unfinished": | |
cand = dic.get(source.text) | |
if cand: | |
print(source.text, cand) | |
return 0 | |
def escape(string): | |
return string.replace("\t", "\\t").replace("\n", "\\n") | |
if __name__ == "__main__": | |
for arg in sys.argv[1:]: | |
if arg == "-all": | |
list_all = True | |
elif arg == "-ct": | |
check_translations = True | |
elif arg =="-total": | |
only_total = True | |
elif arg == "-cand": | |
list_candidates = True | |
countTranslations(sys.argv[-1]) | |
#EXPERIMENTAL: | |
if list_candidates: | |
listCandidates(sys.argv[-1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment