minorua · October 23, 2020 02:58
diff --git a/check_translation.bat b/check_translation.bat
 SET TS_FILE=%1

 C:\Python38\python %~dp0check_translation.py -ct -cand %TS_FILE% > check.diff

 pause
diff --git a/check_translation.py b/check_translation.py
 # -*- coding: utf-8 -*-
 # purpose : check translations
 # begin   : 2013-10-28

 import sys, os
 import re
 from xml.etree import ElementTree

 list_all = False
 check_translations = False
 only_total = False
 list_candidates = False

 sep = "\t"
 fs1 = re.compile("%[0-9a-z]")
 fs2 = re.compile("{.*?}")
 acc = re.compile("&[A-Za-z]")   # accelerator
 asc = re.compile(r"^[\x20-\x7E]+$")   # ASCII character
 tags = re.compile("[a-zA-Z0-9]+,[a-zA-Z0-9]+,.+")   # algorithm tags

 def checkTranslation(s, t):
  if t is None:
    return "None"

  if t.strip() == "":
    return "empty translation"

  if s[-3:] == "..." and t[-3:] != "...":
    return "missing ... (3 dots)"

  if s[-1] == u"…" and t[-3:] != "...":
    return "missing ... (must be 3 dots)"

  if u"…" in t:
    return "ellipsis used"

  if "...." not in s and u"……" not in s and "...." in t:
    return ".... (4 dots) used"

  if sorted(fs1.findall(s)) != sorted(fs1.findall(t)):
    return "format specifiers unmatched (%)"

  if sorted(fs2.findall(s)) != sorted(fs2.findall(t)):
    return "format specifiers unmatched ({})"

  accelerator = acc.findall(s)
  if accelerator and not accelerator[0].upper() in t.upper():
    return "no accelerator"

  #if re.search(u"[０-９]", t):
  #  return "multi-byte number used"

  if re.search(u"　", t):
    return "multi-byte space used"

  if re.search(u"[一-龠ぁ-んァ-ヴ][Ａ-Ｚａ-ｚ][ぁ-ん]", t):
    return "single multi-byte alphabet used"

  if asc.search(t) and s.replace(u"…", "...").replace("/en/", "/ja/") != t:
    return "translation with only ASCII chars, but different from source"

  if tags.search(s) and " " not in s:
    if not set(s.split(",")).issubset(t.split(",")):
      return "translation doesn't contain all tags of source"

  return ""

 def countTranslations(filename):
  tree = ElementTree.parse(filename)
  ts = tree.getroot()

  if not only_total:
    # print column names
    print(sep.join(["context", "messages", "unfinished", "empty", "filename"]))

  warnings = []
  total = [0] * 3
  for context in ts:
    name = context.find("name").text
    messages = context.findall("message")
    unfinished = len(context.findall(".//translation[@type='unfinished']"))
    plugin_name = ""
    source_path = ""

    # count empty translations and check translations
    empty = 0
    for message in messages:
      if not plugin_name:
        location = message.find("location")
        if location is not None:
          source_path = location.get("filename")
          if "/python/plugins/" in source_path:
            plugin_name = source_path.replace("../python/plugins/", "").replace("/python-i18n.cpp", "")

      translation = message.find("translation")
      if translation.get("type") in ("vanished", "obsolete"):
        continue
      source = message.find("source")
      numerusform = translation.find("numerusform")
      translation_text = translation.text if numerusform is None else numerusform.text

      if translation_text is None:
        empty += 1
        continue

      if check_translations:
        warning = checkTranslation(source.text, translation_text)
        if warning:
          warnings.append(sep.join([warning, name, escape(source.text[:50]), escape(translation.text[:50])]))

    if not plugin_name:
      name = "** " + name
      #name += " (" + plugin_name + ")"

    nums = [len(messages), unfinished, empty]
    total = map(lambda x,y: x+y, total, nums)
    if not only_total and (list_all or unfinished > 0 or empty > 0):
      print(sep.join([name] + list(map(str, nums)) + [source_path]))
  # print total
  if only_total:
    print(sep.join(map(str, total)))
  else:
    print(sep.join(["Total"] + list(map(str, total))))

    # print warnings
    if len(warnings) > 0:
      print("")
      print("Warnings")
      print(sep.join(["warning", "classname", "source", "translation"]))
      print("\n".join(warnings))
  return 0

 #EXPERIMENTAL
 def listCandidates(filename):
  tree = ElementTree.parse(filename)
  ts = tree.getroot()

  print("\nCandidates")

  dic = {}

  messages = ts.findall(".//message")
  for message in messages:
    source = message.find("source")
    translation = message.find("translation")
    numerusform = translation.find("numerusform")
    dic[source.text] = translation.text if numerusform is None else numerusform.text

  messages = ts.findall(".//message")
  for message in messages:
    source = message.find("source")
    translation = message.find("translation")
    if translation.get("type") == "unfinished":
      cand = dic.get(source.text)
      if cand:
        print(source.text, cand)

  return 0

 def escape(string):
  return string.replace("\t", "\\t").replace("\n", "\\n")

 if __name__ == "__main__":

  for arg in sys.argv[1:]:
    if arg == "-all":
      list_all = True
    elif arg == "-ct":
      check_translations = True
    elif arg =="-total":
      only_total = True
    elif arg == "-cand":
      list_candidates = True

  countTranslations(sys.argv[-1])

  #EXPERIMENTAL:
  if list_candidates:
    listCandidates(sys.argv[-1])
	SET TS_FILE=%1

	C:\Python38\python %~dp0check_translation.py -ct -cand %TS_FILE% > check.diff

	pause
	# -- coding: utf-8 --
	# purpose : check translations
	# begin : 2013-10-28

	import sys, os
	import re
	from xml.etree import ElementTree

	list_all = False
	check_translations = False
	only_total = False
	list_candidates = False

	sep = "\t"
	fs1 = re.compile("%[0-9a-z]")
	fs2 = re.compile("{.*?}")
	acc = re.compile("&[A-Za-z]") # accelerator
	asc = re.compile(r"^[\x20-\x7E]+$") # ASCII character
	tags = re.compile("[a-zA-Z0-9]+,[a-zA-Z0-9]+,.+") # algorithm tags

	def checkTranslation(s, t):
	if t is None:
	return "None"

	if t.strip() == "":
	return "empty translation"

	if s[-3:] == "..." and t[-3:] != "...":
	return "missing ... (3 dots)"

	if s[-1] == u"…" and t[-3:] != "...":
	return "missing ... (must be 3 dots)"

	if u"…" in t:
	return "ellipsis used"

	if "...." not in s and u"……" not in s and "...." in t:
	return ".... (4 dots) used"

	if sorted(fs1.findall(s)) != sorted(fs1.findall(t)):
	return "format specifiers unmatched (%)"

	if sorted(fs2.findall(s)) != sorted(fs2.findall(t)):
	return "format specifiers unmatched ({})"

	accelerator = acc.findall(s)
	if accelerator and not accelerator[0].upper() in t.upper():
	return "no accelerator"

	#if re.search(u"[０-９]", t):
	# return "multi-byte number used"

	if re.search(u"　", t):
	return "multi-byte space used"

	if re.search(u"[一-龠ぁ-んァ-ヴ][Ａ-Ｚａ-ｚ][ぁ-ん]", t):
	return "single multi-byte alphabet used"

	if asc.search(t) and s.replace(u"…", "...").replace("/en/", "/ja/") != t:
	return "translation with only ASCII chars, but different from source"

	if tags.search(s) and " " not in s:
	if not set(s.split(",")).issubset(t.split(",")):
	return "translation doesn't contain all tags of source"

	return ""

	def countTranslations(filename):
	tree = ElementTree.parse(filename)
	ts = tree.getroot()

	if not only_total:
	# print column names
	print(sep.join(["context", "messages", "unfinished", "empty", "filename"]))

	warnings = []
	total = [0] * 3
	for context in ts:
	name = context.find("name").text
	messages = context.findall("message")
	unfinished = len(context.findall(".//translation[@type='unfinished']"))
	plugin_name = ""
	source_path = ""

	# count empty translations and check translations
	empty = 0
	for message in messages:
	if not plugin_name:
	location = message.find("location")
	if location is not None:
	source_path = location.get("filename")
	if "/python/plugins/" in source_path:
	plugin_name = source_path.replace("../python/plugins/", "").replace("/python-i18n.cpp", "")

	translation = message.find("translation")
	if translation.get("type") in ("vanished", "obsolete"):
	continue
	source = message.find("source")
	numerusform = translation.find("numerusform")
	translation_text = translation.text if numerusform is None else numerusform.text

	if translation_text is None:
	empty += 1
	continue

	if check_translations:
	warning = checkTranslation(source.text, translation_text)
	if warning:
	warnings.append(sep.join([warning, name, escape(source.text[:50]), escape(translation.text[:50])]))

	if not plugin_name:
	name = "** " + name
	#name += " (" + plugin_name + ")"

	nums = [len(messages), unfinished, empty]
	total = map(lambda x,y: x+y, total, nums)
	if not only_total and (list_all or unfinished > 0 or empty > 0):
	print(sep.join([name] + list(map(str, nums)) + [source_path]))
	# print total
	if only_total:
	print(sep.join(map(str, total)))
	else:
	print(sep.join(["Total"] + list(map(str, total))))

	# print warnings
	if len(warnings) > 0:
	print("")
	print("Warnings")
	print(sep.join(["warning", "classname", "source", "translation"]))
	print("\n".join(warnings))
	return 0

	#EXPERIMENTAL
	def listCandidates(filename):
	tree = ElementTree.parse(filename)
	ts = tree.getroot()

	print("\nCandidates")

	dic = {}

	messages = ts.findall(".//message")
	for message in messages:
	source = message.find("source")
	translation = message.find("translation")
	numerusform = translation.find("numerusform")
	dic[source.text] = translation.text if numerusform is None else numerusform.text

	messages = ts.findall(".//message")
	for message in messages:
	source = message.find("source")
	translation = message.find("translation")
	if translation.get("type") == "unfinished":
	cand = dic.get(source.text)
	if cand:
	print(source.text, cand)

	return 0

	def escape(string):
	return string.replace("\t", "\\t").replace("\n", "\\n")

	if __name__ == "__main__":

	for arg in sys.argv[1:]:
	if arg == "-all":
	list_all = True
	elif arg == "-ct":
	check_translations = True
	elif arg =="-total":
	only_total = True
	elif arg == "-cand":
	list_candidates = True

	countTranslations(sys.argv[-1])

	#EXPERIMENTAL:
	if list_candidates:
	listCandidates(sys.argv[-1])