Skip to content

Instantly share code, notes, and snippets.

@thundergolfer
Last active January 27, 2017 01:45
Show Gist options
  • Save thundergolfer/49c2d41536dd24c13ed337334e4ca1ae to your computer and use it in GitHub Desktop.
Save thundergolfer/49c2d41536dd24c13ed337334e4ca1ae to your computer and use it in GitHub Desktop.
Long translations break UI. This sucks for UX designers and front-enders alike. This finds the longest translation for a Zendesk translation or a new string to be translated.
# coding: utf-8
# In[35]:
import argparse
import html
import json
import requests
from requests.utils import quote
import re
import sys
from bs4 import BeautifulSoup
# In[ ]:
# In[37]:
def get_subdomain_locales(subdomain):
locales_url = 'https://{}.zendesk.com/api/v2/locales.json'.format(subdomain)
r = requests.get(locales_url)
content = json.loads(r.content.decode('UTF-8'))
return content['locales']
def zen_locale_to_googlelang(zen_locale, google_langs):
code = zen_locale['locale']
native_locale_name = zen_locale['name']
english_locale_name = zen_locale['presentation_name'].split(' - ')
# try match code
for key in google_langs.keys():
if google_langs[key] == code:
return code
# try match 'part-1' of code
code_first_part = code.split('-')[0]
for key in google_langs.keys():
if google_langs[key] == code_first_part:
return code_first_part
# failed to match code, try matching against name
for key in google_langs.keys():
if english_local_name.lower() == key.lower():
return google_langs[key]
# failed to match
return None
def get_zendesk_translations(subdomain, *trans_items):
zendesk_trans = {}
trans_object = trans_items[0] # only doing one atm
zendesk_trans[trans_object] = []
for l in zendesk_official_langs.keys():
base_endpoint = 'https://{}.zendesk.com/api/v2/locales/{}.json?'.format(subdomain, zendesk_official_langs[l])
# strings = '[' + ','.join(trans_items) + ']'
strings=trans_object # currently only passing one translation object
query_params = 'include=translations&strings={}'.format(strings)
r = requests.get(base_endpoint + query_params)
content = json.loads(r.content.decode('UTF-8'))
# Try/Except for 'translation missing' is a bit hacky
try:
trans_str = content['locale']['translations'][trans_object]
zendesk_trans[trans_object].append(trans_str)
except KeyError:
print("Missing Translation")
continue
print(zendesk_trans)
return zendesk_trans
def get_longest_translations( translations ):
longest = {}
for key in translations.keys():
longest[key] = max(translations[key], key=len)
return longest
def display_longest_translations( longest ):
for key in longest.keys():
print("The longest translation for: ")
print(key)
print("is", "'" + longest[key] + "'")
print("which is {} characters long.".format(len(longest[key])))
print()
headers = {'User-Agent':
"Mozilla/4.0 (\
compatible;\
MSIE 6.0;\
Windows NT 5.1;\
SV1;\
.NET CLR 1.1.4322;\
.NET CLR 2.0.50727;\
.NET CLR 3.0.04506.30\
)"}
# https://github.com/mouuff/mtranslate/blob/master/mtranslate/core.py
def g_translate(to_translate, to_language="auto", from_language="auto"):
"""Returns the translation using google translate
you must shortcut the language you define
(French = fr, English = en, Spanish = es, etc...)
if not defined it will detect it or use english by default
Example:
print(translate("salut tu vas bien?", "en"))
hello you alright?
"""
base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
to_translate = quote(to_translate)
link = base_link % (to_language, from_language, to_translate)
r = requests.get(link, headers=headers)
raw_data = r.content
data = raw_data.decode("utf-8")
expr = r'class="t0">(.*?)<'
re_result = re.findall(expr, data)
if (len(re_result) == 0):
result = ""
else:
result = html.unescape(re_result[0])
return (result)
def get_google_translations(*strings, locales, google_langs):
g_translations = {}
for l in locales:
l = zen_locale_to_googlelang(l, google_langs) # map zendesk locales to google translate languages
for s in strings:
g_translations[s] = []
for l in locales:
out_lang = l['locale']
t = g_translate(s, out_lang)
g_translations[s].append(t)
return g_translations
# In[13]:
google_translate_langs = {
'Afrikaans' : 'af',
'Albanian' : 'sq',
'Amharic' : 'am',
'Arabic' : 'ar',
'Armenian' : 'hy',
'Azeerbaijani' : 'az',
'Basque' : 'eu',
'Belarusian' : 'be',
'Bengali' : 'bn',
'Bosnian' : 'bs',
'Bulgarian' : 'bg',
'Catalan' : 'ca',
'Cebuano' : 'ceb',
'Chichewa' : 'ny',
'Chinese (Simplified)' : 'zh-CN',
'Chinese (Traditional)' : 'zh-TW',
'Corsican' : 'co',
'Croatian' : 'hr',
'Czech' : 'cs',
'Danish' : 'da',
'Dutch' : 'nl',
'English' : 'en',
'Esperanto' : 'eo',
'Estonian' : 'et',
'Filipino' : 'tl',
'Finnish' : 'fi',
'French' : 'fr',
'Frisian' : 'fy',
'Galician' : 'gl',
'Georgian' : 'ka',
'German' : 'de',
'Greek' : 'el',
'Gujarati' : 'gu',
'Haitian Creole' : 'ht',
'Hausa' : 'ha',
'Hawaiian' : 'haw',
'Hebrew' : 'iw',
'Hindi' : 'hi',
'Hmong' : 'hmn',
'Hungarian' : 'hu',
'Icelandic' : 'is',
'Igbo' : 'ig',
'Indonesian' : 'id',
'Irish' : 'ga',
'Italian' : 'it',
'Japanese' : 'ja',
'Javanese' : 'jw',
'Kannada' : 'kn',
'Kazakh' : 'kk',
'Khmer' : 'km',
'Korean' : 'ko',
'Kurdish' : 'ku',
'Kyrgyz' : 'ky',
'Lao' : 'lo',
'Latin' : 'la',
'Latvian' : 'lv',
'Lithuanian' : 'lt',
'Luxembourgish' : 'lb',
'Macedonian' : 'mk',
'Malagasy' : 'mg',
'Malay' : 'ms',
'Malayalam' : 'ml',
'Maltese' : 'mt',
'Maori' : 'mi',
'Marathi' : 'mr',
'Mongolian' : 'mn',
'Burmese' : 'my',
'Nepali' : 'ne',
'Norwegian' : 'no',
'Pashto' : 'ps',
'Persian' : 'fa',
'Polish' : 'pl',
'Portuguese' : 'pt',
'Punjabi' : 'ma',
'Romanian' : 'ro',
'Russian' : 'ru',
'Samoan' : 'sm',
'Scots Gaelic' : 'gd',
'Serbian' : 'sr',
'Sesotho' : 'st',
'Shona' : 'sn',
'Sindhi' : 'sd',
'Sinhala' : 'si',
'Slovak' : 'sk',
'Slovenian' : 'sl',
'Somali' : 'so',
'Spanish' : 'es',
'Sundanese' : 'su',
'Swahili' : 'sw',
'Swedish' : 'sv',
'Tajik' : 'tg',
'Tamil' : 'ta',
'Telugu' : 'te',
'Thai' : 'th',
'Turkish' : 'tr',
'Ukrainian' : 'uk',
'Urdu' : 'ur',
'Uzbek' : 'uz',
'Vietnamese' : 'vi',
'Welsh' : 'cy',
'Xhosa' : 'xh',
'Yiddish' : 'yi',
'Yoruba' : 'yo',
'Zulu' : 'zu',
}
# In[14]:
zendesk_official_langs = {
'Arabic' : 'ar',
'Bulgarian' : 'bg',
'Catalan' : 'ca',
'Czech' : 'cs',
'Danish' : 'da',
'German' : 'de',
'Greek' : 'el',
'English (Canada)' : 'en-CA',
'English (GB)' : 'en-GB',
'English' : 'en-US',
'Spanish' : 'es',
'Spanish (LATAM)' : 'es-419',
'Spanish (Spain)' : 'es-ES',
'Spanish (Mexico)' : 'es-MX',
'Estonian' : 'et',
'Persian' : 'fa',
'Finnish' : 'fi',
'Filipino' : 'fil',
'French' : 'fr',
'French (Canada)' : 'fr-CA',
'French (France)' : 'fr-FR',
'Hebrew' : 'he',
'Hindi' : 'hi',
'Croatian' : 'hr',
'Indonesian' : 'id',
'Icelandic' : 'is',
'Italian' : 'it',
'Japanese' : 'ja',
'Korean' : 'ko',
'Lithuanian' : 'lt',
'Latvian' : 'lv',
'Malay' : 'ms',
'Dutch' : 'nl',
'Norwegian' : 'no',
'Polish' : 'pl',
'Portuguese' : 'pt',
'Brazilian Portuguese' : 'pt-BR',
'Romanian' : 'ro',
'Russian' : 'ru',
'Slovakian' : 'sk',
'Slovenian' : 'sl',
'Serbian' : 'sr',
'Swedish' : 'sv',
'Thai' : 'th',
'Turkish' : 'tr',
'Ukrainian' : 'uk',
'Vietnamese' : 'vi',
'Simplified Chinese' : 'zh-CN',
'Traditional Chinese' : 'zh-TW',
}
# In[15]:
zendesk_crowdsourced_langs = {
'Afrikaans' : 'af',
'Azerbaijani' : 'az',
'Catalan' : 'ca',
'Estonian' : 'et',
'Basque' : 'eu',
'Croatian' : 'hr',
'Georgian' : 'ka',
'Hungarian' : 'hu',
'Icelandic' : 'is',
'Lithuanian' : 'lt',
'Latvian' : 'lv',
'Pashto' : 'ps',
'Slovakian' : 'sk',
'Slovenian' : 'sl',
'Albanian' : 'sq',
'Serbian' : 'sr',
'Serbian (Montenegro)' : 'sr-ME',
'Ukrainian' : 'uk',
}
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--subdomain', nargs='?', help='The subdomain to which the translation is related. "Support" is default')
parser.add_argument('-e', '--existing', default=None, help='The name of an existing translation item')
parser.add_argument('-n', '--new', default=None, help='If a translation does not yet exist in the subdomain, enter a string.')
args = parser.parse_args()
if args.subdomain == None:
args.subdomain = 'support'
print("Using default subdomain: 'support' (Z1)")
if args.existing and args.new:
sys.exit("Cannot enter both an existing translation object AND a new translation string.")
elif args.new:
locales = get_subdomain_locales(args.subdomain)
longest = get_longest_translations( get_google_translations(
args.new,
locales=locales,
google_langs=google_translate_langs
)
)
display_longest_translations(longest)
elif args.existing:
locales = get_subdomain_locales(args.subdomain)
longest = get_longest_translations( get_zendesk_translations(
args.subdomain,
args.existing
)
)
display_longest_translations(longest)
else:
sys.exit("Please enter either an existing translation object OR a new translation string")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment