Last active
January 4, 2024 23:35
-
-
Save Bluscream/05b7f953dc22c305c18e3143c7f17282 to your computer and use it in GitHub Desktop.
Translation Obfuscator/Scrambler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
import deep_translator | |
from deep_translator.exceptions import RequestError, LanguageNotSupportedException | |
from deep_translator import ( | |
GoogleTranslator, | |
PonsTranslator, | |
LingueeTranslator, | |
MyMemoryTranslator | |
) | |
google_languages = { 'en': 'en', 'de': 'de', 'fr': 'fr', 'es': 'es', 'it': 'it', 'pt': 'pt', 'ru': 'ru', | |
'ja': 'ja', 'ko': 'ko', 'zh': 'zh-CN', 'ar': 'ar', 'tr': 'tr', 'pl': 'pl', 'nl': 'nl', | |
'sv': 'sv', 'fi': 'fi' | |
} | |
linguee_languages = { | |
'en': 'english', 'de': 'german', 'fr': 'french', 'es': 'spanish', 'it': 'italian', 'pt': 'portuguese', 'ru': 'russian', | |
'ja': 'japanese', 'ko': 'korean', 'zh': 'chinese', 'ar': 'arabic', 'tr': 'turkish', 'pl': 'polish', 'nl': 'dutch', | |
'sv': 'swedish', 'fi': 'finnish' | |
} | |
mymemory_languages = { | |
'en': 'en-US', 'de': 'de-DE', 'fr': 'fr-FR', 'es': 'es-ES', 'it': 'it-IT', 'pt': 'pt-PT', 'ru': 'ru-RU', | |
} | |
languages = ['de', 'fr', 'es', 'it', 'pt', 'ru', 'ja', 'ko', 'zh'] | |
# translators = [GoogleTranslator, PonsTranslator, LingueeTranslator, MyMemoryTranslator] | |
credits_regex = r";A([\w\s-]+)" | |
out_dir = 'translate/out' | |
def translate(translator, text, lang: str, source: str) -> str: | |
print(f"translate({translator.__class__.__name__}, \"{text}\", {lang}, {source})") | |
try: | |
translated = translator.translate(text, src=source, dest=lang) | |
if translated and translated.strip() and not translated == text: | |
if ' IS AN INVALID SOURCE LANGUAGE .' in translated: | |
print(f"Got invalid source language error for {translator.__class__.__name__} ({source} -> {lang}): {translated}") | |
raise e | |
return translated | |
except RequestError as e: | |
print(f"RequestError: {e}") | |
raise e | |
except LanguageNotSupportedException as e: | |
print(f"LanguageNotSupportedException: {e}") | |
raise e | |
return text | |
def translate_text(text, language, source): # ='auto'): | |
print(f"Translating \"{text}\" from {source} to {language}") | |
translator = GoogleTranslator(source=google_languages[source], target=google_languages[language]) | |
try: return translate(translator, text, language, source) | |
except Exception as e: print(f"Failed to google translate \"{text}\" from {source} to {language}: {e.__class__.__name__} ({e})") | |
translator = PonsTranslator(source=source, target=language) | |
try: return translate(translator, text, language, source) | |
except Exception as e: print(f"Failed to pons translate \"{text}\" from {source} to {language}: {e.__class__.__name__} ({e})") | |
translator = LingueeTranslator(source=linguee_languages[source], target=linguee_languages[language]) | |
try: return translate(translator, text, language, source) | |
except Exception as e: print(f"Failed to linguee translate \"{text}\" from {source} to {language}: {e.__class__.__name__} ({e})") | |
translator = MyMemoryTranslator(source=mymemory_languages[source], target=mymemory_languages[language]) | |
try: return translate(translator, text, language, source) | |
except Exception as e: print(f"Failed to mymemory translate \"{text}\" from {source} to {language}: {e.__class__.__name__} ({e})") | |
print(f"Failed to translate \"{text}\" from {source} to {language}") | |
return text | |
def scramble_text(text) -> dict[str, str]: | |
ret = {} | |
if len(text) < 3: | |
print(f"Skipping short text: {text}") | |
for lang in languages: ret[lang] = text | |
return ret | |
scramble_count = 0 | |
scrambled = text | |
source = 'en' | |
for lang in languages: | |
print(f"Translating {scrambled} to {lang}") | |
scrambled = translate_text(scrambled, lang, source) | |
scramble_count += 1 | |
print(f"Translated to {scrambled} (scrambles: {scramble_count})") | |
# translate_back = translate_text(scrambled, "en") | |
# print(f"Translated back to {translate_back} (scrambles: {scramble_count})") | |
ret[lang] = scrambled | |
source = lang | |
# time.sleep(.1) | |
ret["scrambled"] = translate_text(scrambled, "en", source) | |
return ret | |
def main(): | |
if not os.path.exists(out_dir): os.makedirs(out_dir) | |
with open('translate/input.csv', 'r') as csv_file: | |
reader = csv.DictReader(csv_file, delimiter=';') | |
rows = list(reader) | |
outputs = { | |
# 'en': open(out_dir+'/en.csv', 'w', newline=''), | |
'scrambled': {} | |
} | |
for lang in languages: | |
outputs[lang] = {} | |
for i, row in enumerate(rows): | |
row_values = list(row.values()) | |
key = row_values[0] | |
if not key.strip(): continue | |
txt = row_values[1] | |
print(f"Processing rows[{i}] \"{key}\" = \"{txt}\"") | |
if key.startswith('INPUT'): | |
print(f"Ignoring input: {key}") | |
continue | |
elif key.startswith('Credits'): | |
print(f"Ignoring credits: {key}") | |
continue | |
# Y11;TCharAni;A;APeter Day;ALisa Springett;AJason Smith;W3 | |
val = key.split(';') | |
for v in val: | |
if v.startswith('A') and len(v) > 1: | |
key = v[1:] | |
# replace v with scrambled text | |
val[val.index(v)] = scramble_text(txt)['scrambled'] | |
break | |
txt = ';'.join(val) | |
else: | |
txt = scramble_text(txt) | |
for lang, items in outputs.items(): | |
with open(f"{out_dir}/{lang}.csv", 'r') as csv_file: | |
writer = csv.writer(csv_file, delimiter=';') | |
for key, value in items.items(): | |
writer.writerow([key, value]) | |
print("Done") | |
if __name__ == "__main__": | |
main() | |
print("Done") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment