Last active
September 26, 2021 13:04
-
-
Save yosshy/2394c6e48bdf38dc5a6e3ace33767fc1 to your computer and use it in GitHub Desktop.
Japanese translation checker for minikube
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecs | |
import json | |
import re | |
import string | |
CHO_ON_PATTERN = re.compile(r"[ア-ンー・]+[アカガサザタダナハバパマヤラワァィゥェォ][^ア-ン]") | |
ILLEGAL_CHARACTERS = re.compile("[ ({【『[)}】』]”’,.・0123456789:/-]") | |
ILLEGAL_KANJI_PATTERN = re.compile("(下さい|無し|為|する事)") | |
QUOTED_PATTERN = re.compile(r"'[^']+'") | |
DOUBLE_QUOTED_PATTERN = re.compile(r'"[^"]+"') | |
LETTERS = string.ascii_letters + string.punctuation | |
HANKAKU_SPACE_PATTERN1 = re.compile(f"[{LETTERS}][^{string.printable}]") | |
HANKAKU_SPACE_PATTERN2 = re.compile(f"[^{string.printable}][{LETTERS}]") | |
# 末尾に長音記号がないかどうか | |
def cho_on(source: str) -> str: | |
result = CHO_ON_PATTERN.findall(source) | |
if result is None: | |
return [] | |
return [x[:-1] for x in result if x[-1] != "ー"] | |
# 強調・引用部分がないかどうか | |
def quoted(source: str) -> str: | |
result = QUOTED_PATTERN.findall(source) | |
result += DOUBLE_QUOTED_PATTERN.findall(source) | |
if result is None: | |
return [] | |
return [x[:-1] for x in result if x[-1] != "ー"] | |
# 不正文字をつかっているかどうか | |
def illegal_characters(source: str) -> str: | |
result = ILLEGAL_CHARACTERS.findall(source) | |
if len(result) > 0: | |
return result | |
# 制限漢字表記があるかどうか | |
def illegal_kanji(source: str) -> str: | |
result = ILLEGAL_KANJI_PATTERN.findall(source) | |
if len(result) > 0: | |
return result | |
# 半角単語と全角単語の間に半角空白があるかどうか | |
def hankaku_space(source: str) -> str: | |
result = HANKAKU_SPACE_PATTERN1.findall(source) | |
result = [x for x in result if x[0] not in ["("]] | |
result = [x for x in result if x[1] not in ["」", "。", "、"]] | |
result2 = HANKAKU_SPACE_PATTERN2.findall(source) | |
result2 = [x for x in result2 if x[0] not in ["。", "、", "「"]] | |
result2 = [x for x in result2 if x[1] not in [":", ")"]] | |
if len(result) > 0: | |
return result | |
with open("ja.json", encoding="utf8") as f: | |
data = json.load(f) | |
sorted(data) | |
_count = 1 | |
for english, japanese in data.items(): | |
# 行数カウンタ | |
_count += 1 | |
japanese2 = codecs.escape_decode(bytes(japanese, "utf-8"))[0]\ | |
.decode("utf-8") | |
result = cho_on(japanese2) | |
if result: | |
print(f"{_count} {japanese}: 長音注意 ({str(result)[1:-1]})") | |
result = quoted(japanese2) | |
if result: | |
print(f"{_count} {japanese}: 強調・引用注意 ({str(result)[1:-1]})") | |
result = illegal_characters(japanese2) | |
if result: | |
print(f"{_count} {japanese}: 使用禁止文字あり ({str(result)[1:-1]})") | |
result = illegal_kanji(japanese2) | |
if result: | |
print(f"{_count} {japanese}: 使用禁止フレーズあり ({str(result)[1:-1]})") | |
result = hankaku_space(japanese2) | |
if result: | |
print(f"{_count} {japanese}: 半角スペース注意 ({str(result)[1:-1]})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment