Last active
January 27, 2017 01:45
-
-
Save thundergolfer/49c2d41536dd24c13ed337334e4ca1ae to your computer and use it in GitHub Desktop.
Long translations break UI. This sucks for UX designers and front-enders alike. This finds the longest translation for a Zendesk translation or a new string to be translated.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[35]: | |
import argparse | |
import html | |
import json | |
import requests | |
from requests.utils import quote | |
import re | |
import sys | |
from bs4 import BeautifulSoup | |
# In[ ]: | |
# In[37]: | |
def get_subdomain_locales(subdomain): | |
locales_url = 'https://{}.zendesk.com/api/v2/locales.json'.format(subdomain) | |
r = requests.get(locales_url) | |
content = json.loads(r.content.decode('UTF-8')) | |
return content['locales'] | |
def zen_locale_to_googlelang(zen_locale, google_langs): | |
code = zen_locale['locale'] | |
native_locale_name = zen_locale['name'] | |
english_locale_name = zen_locale['presentation_name'].split(' - ') | |
# try match code | |
for key in google_langs.keys(): | |
if google_langs[key] == code: | |
return code | |
# try match 'part-1' of code | |
code_first_part = code.split('-')[0] | |
for key in google_langs.keys(): | |
if google_langs[key] == code_first_part: | |
return code_first_part | |
# failed to match code, try matching against name | |
for key in google_langs.keys(): | |
if english_local_name.lower() == key.lower(): | |
return google_langs[key] | |
# failed to match | |
return None | |
def get_zendesk_translations(subdomain, *trans_items): | |
zendesk_trans = {} | |
trans_object = trans_items[0] # only doing one atm | |
zendesk_trans[trans_object] = [] | |
for l in zendesk_official_langs.keys(): | |
base_endpoint = 'https://{}.zendesk.com/api/v2/locales/{}.json?'.format(subdomain, zendesk_official_langs[l]) | |
# strings = '[' + ','.join(trans_items) + ']' | |
strings=trans_object # currently only passing one translation object | |
query_params = 'include=translations&strings={}'.format(strings) | |
r = requests.get(base_endpoint + query_params) | |
content = json.loads(r.content.decode('UTF-8')) | |
# Try/Except for 'translation missing' is a bit hacky | |
try: | |
trans_str = content['locale']['translations'][trans_object] | |
zendesk_trans[trans_object].append(trans_str) | |
except KeyError: | |
print("Missing Translation") | |
continue | |
print(zendesk_trans) | |
return zendesk_trans | |
def get_longest_translations( translations ): | |
longest = {} | |
for key in translations.keys(): | |
longest[key] = max(translations[key], key=len) | |
return longest | |
def display_longest_translations( longest ): | |
for key in longest.keys(): | |
print("The longest translation for: ") | |
print(key) | |
print("is", "'" + longest[key] + "'") | |
print("which is {} characters long.".format(len(longest[key]))) | |
print() | |
headers = {'User-Agent': | |
"Mozilla/4.0 (\ | |
compatible;\ | |
MSIE 6.0;\ | |
Windows NT 5.1;\ | |
SV1;\ | |
.NET CLR 1.1.4322;\ | |
.NET CLR 2.0.50727;\ | |
.NET CLR 3.0.04506.30\ | |
)"} | |
# https://github.com/mouuff/mtranslate/blob/master/mtranslate/core.py | |
def g_translate(to_translate, to_language="auto", from_language="auto"): | |
"""Returns the translation using google translate | |
you must shortcut the language you define | |
(French = fr, English = en, Spanish = es, etc...) | |
if not defined it will detect it or use english by default | |
Example: | |
print(translate("salut tu vas bien?", "en")) | |
hello you alright? | |
""" | |
base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" | |
to_translate = quote(to_translate) | |
link = base_link % (to_language, from_language, to_translate) | |
r = requests.get(link, headers=headers) | |
raw_data = r.content | |
data = raw_data.decode("utf-8") | |
expr = r'class="t0">(.*?)<' | |
re_result = re.findall(expr, data) | |
if (len(re_result) == 0): | |
result = "" | |
else: | |
result = html.unescape(re_result[0]) | |
return (result) | |
def get_google_translations(*strings, locales, google_langs): | |
g_translations = {} | |
for l in locales: | |
l = zen_locale_to_googlelang(l, google_langs) # map zendesk locales to google translate languages | |
for s in strings: | |
g_translations[s] = [] | |
for l in locales: | |
out_lang = l['locale'] | |
t = g_translate(s, out_lang) | |
g_translations[s].append(t) | |
return g_translations | |
# In[13]: | |
google_translate_langs = { | |
'Afrikaans' : 'af', | |
'Albanian' : 'sq', | |
'Amharic' : 'am', | |
'Arabic' : 'ar', | |
'Armenian' : 'hy', | |
'Azeerbaijani' : 'az', | |
'Basque' : 'eu', | |
'Belarusian' : 'be', | |
'Bengali' : 'bn', | |
'Bosnian' : 'bs', | |
'Bulgarian' : 'bg', | |
'Catalan' : 'ca', | |
'Cebuano' : 'ceb', | |
'Chichewa' : 'ny', | |
'Chinese (Simplified)' : 'zh-CN', | |
'Chinese (Traditional)' : 'zh-TW', | |
'Corsican' : 'co', | |
'Croatian' : 'hr', | |
'Czech' : 'cs', | |
'Danish' : 'da', | |
'Dutch' : 'nl', | |
'English' : 'en', | |
'Esperanto' : 'eo', | |
'Estonian' : 'et', | |
'Filipino' : 'tl', | |
'Finnish' : 'fi', | |
'French' : 'fr', | |
'Frisian' : 'fy', | |
'Galician' : 'gl', | |
'Georgian' : 'ka', | |
'German' : 'de', | |
'Greek' : 'el', | |
'Gujarati' : 'gu', | |
'Haitian Creole' : 'ht', | |
'Hausa' : 'ha', | |
'Hawaiian' : 'haw', | |
'Hebrew' : 'iw', | |
'Hindi' : 'hi', | |
'Hmong' : 'hmn', | |
'Hungarian' : 'hu', | |
'Icelandic' : 'is', | |
'Igbo' : 'ig', | |
'Indonesian' : 'id', | |
'Irish' : 'ga', | |
'Italian' : 'it', | |
'Japanese' : 'ja', | |
'Javanese' : 'jw', | |
'Kannada' : 'kn', | |
'Kazakh' : 'kk', | |
'Khmer' : 'km', | |
'Korean' : 'ko', | |
'Kurdish' : 'ku', | |
'Kyrgyz' : 'ky', | |
'Lao' : 'lo', | |
'Latin' : 'la', | |
'Latvian' : 'lv', | |
'Lithuanian' : 'lt', | |
'Luxembourgish' : 'lb', | |
'Macedonian' : 'mk', | |
'Malagasy' : 'mg', | |
'Malay' : 'ms', | |
'Malayalam' : 'ml', | |
'Maltese' : 'mt', | |
'Maori' : 'mi', | |
'Marathi' : 'mr', | |
'Mongolian' : 'mn', | |
'Burmese' : 'my', | |
'Nepali' : 'ne', | |
'Norwegian' : 'no', | |
'Pashto' : 'ps', | |
'Persian' : 'fa', | |
'Polish' : 'pl', | |
'Portuguese' : 'pt', | |
'Punjabi' : 'ma', | |
'Romanian' : 'ro', | |
'Russian' : 'ru', | |
'Samoan' : 'sm', | |
'Scots Gaelic' : 'gd', | |
'Serbian' : 'sr', | |
'Sesotho' : 'st', | |
'Shona' : 'sn', | |
'Sindhi' : 'sd', | |
'Sinhala' : 'si', | |
'Slovak' : 'sk', | |
'Slovenian' : 'sl', | |
'Somali' : 'so', | |
'Spanish' : 'es', | |
'Sundanese' : 'su', | |
'Swahili' : 'sw', | |
'Swedish' : 'sv', | |
'Tajik' : 'tg', | |
'Tamil' : 'ta', | |
'Telugu' : 'te', | |
'Thai' : 'th', | |
'Turkish' : 'tr', | |
'Ukrainian' : 'uk', | |
'Urdu' : 'ur', | |
'Uzbek' : 'uz', | |
'Vietnamese' : 'vi', | |
'Welsh' : 'cy', | |
'Xhosa' : 'xh', | |
'Yiddish' : 'yi', | |
'Yoruba' : 'yo', | |
'Zulu' : 'zu', | |
} | |
# In[14]: | |
zendesk_official_langs = { | |
'Arabic' : 'ar', | |
'Bulgarian' : 'bg', | |
'Catalan' : 'ca', | |
'Czech' : 'cs', | |
'Danish' : 'da', | |
'German' : 'de', | |
'Greek' : 'el', | |
'English (Canada)' : 'en-CA', | |
'English (GB)' : 'en-GB', | |
'English' : 'en-US', | |
'Spanish' : 'es', | |
'Spanish (LATAM)' : 'es-419', | |
'Spanish (Spain)' : 'es-ES', | |
'Spanish (Mexico)' : 'es-MX', | |
'Estonian' : 'et', | |
'Persian' : 'fa', | |
'Finnish' : 'fi', | |
'Filipino' : 'fil', | |
'French' : 'fr', | |
'French (Canada)' : 'fr-CA', | |
'French (France)' : 'fr-FR', | |
'Hebrew' : 'he', | |
'Hindi' : 'hi', | |
'Croatian' : 'hr', | |
'Indonesian' : 'id', | |
'Icelandic' : 'is', | |
'Italian' : 'it', | |
'Japanese' : 'ja', | |
'Korean' : 'ko', | |
'Lithuanian' : 'lt', | |
'Latvian' : 'lv', | |
'Malay' : 'ms', | |
'Dutch' : 'nl', | |
'Norwegian' : 'no', | |
'Polish' : 'pl', | |
'Portuguese' : 'pt', | |
'Brazilian Portuguese' : 'pt-BR', | |
'Romanian' : 'ro', | |
'Russian' : 'ru', | |
'Slovakian' : 'sk', | |
'Slovenian' : 'sl', | |
'Serbian' : 'sr', | |
'Swedish' : 'sv', | |
'Thai' : 'th', | |
'Turkish' : 'tr', | |
'Ukrainian' : 'uk', | |
'Vietnamese' : 'vi', | |
'Simplified Chinese' : 'zh-CN', | |
'Traditional Chinese' : 'zh-TW', | |
} | |
# In[15]: | |
zendesk_crowdsourced_langs = { | |
'Afrikaans' : 'af', | |
'Azerbaijani' : 'az', | |
'Catalan' : 'ca', | |
'Estonian' : 'et', | |
'Basque' : 'eu', | |
'Croatian' : 'hr', | |
'Georgian' : 'ka', | |
'Hungarian' : 'hu', | |
'Icelandic' : 'is', | |
'Lithuanian' : 'lt', | |
'Latvian' : 'lv', | |
'Pashto' : 'ps', | |
'Slovakian' : 'sk', | |
'Slovenian' : 'sl', | |
'Albanian' : 'sq', | |
'Serbian' : 'sr', | |
'Serbian (Montenegro)' : 'sr-ME', | |
'Ukrainian' : 'uk', | |
} | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-s', '--subdomain', nargs='?', help='The subdomain to which the translation is related. "Support" is default') | |
parser.add_argument('-e', '--existing', default=None, help='The name of an existing translation item') | |
parser.add_argument('-n', '--new', default=None, help='If a translation does not yet exist in the subdomain, enter a string.') | |
args = parser.parse_args() | |
if args.subdomain == None: | |
args.subdomain = 'support' | |
print("Using default subdomain: 'support' (Z1)") | |
if args.existing and args.new: | |
sys.exit("Cannot enter both an existing translation object AND a new translation string.") | |
elif args.new: | |
locales = get_subdomain_locales(args.subdomain) | |
longest = get_longest_translations( get_google_translations( | |
args.new, | |
locales=locales, | |
google_langs=google_translate_langs | |
) | |
) | |
display_longest_translations(longest) | |
elif args.existing: | |
locales = get_subdomain_locales(args.subdomain) | |
longest = get_longest_translations( get_zendesk_translations( | |
args.subdomain, | |
args.existing | |
) | |
) | |
display_longest_translations(longest) | |
else: | |
sys.exit("Please enter either an existing translation object OR a new translation string") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment