thundergolfer · January 27, 2017 01:45
diff --git a/longest_translation.py b/longest_translation.py

 # coding: utf-8

 # In[35]:

 import argparse
 import html
 import json
 import requests
 from requests.utils import quote
 import re
 import sys
 from bs4 import BeautifulSoup


 # In[ ]:
 # In[37]:

 def get_subdomain_locales(subdomain):
    locales_url = 'https://{}.zendesk.com/api/v2/locales.json'.format(subdomain)
    r = requests.get(locales_url)
    content = json.loads(r.content.decode('UTF-8'))

    return content['locales']


 def zen_locale_to_googlelang(zen_locale, google_langs):
    code = zen_locale['locale']
    native_locale_name = zen_locale['name']
    english_locale_name = zen_locale['presentation_name'].split(' - ')
    # try match code
    for key in google_langs.keys():
        if google_langs[key] == code:
            return code
    # try match 'part-1' of code
    code_first_part = code.split('-')[0]
    for key in google_langs.keys():
        if google_langs[key] == code_first_part:
            return code_first_part
    # failed to match code, try matching against name
    for key in google_langs.keys():
        if english_local_name.lower() == key.lower():
            return google_langs[key]
    # failed to match
    return None

 def get_zendesk_translations(subdomain, *trans_items):
    zendesk_trans = {}
    trans_object = trans_items[0] # only doing one atm
    zendesk_trans[trans_object] = []
    for l in zendesk_official_langs.keys():
        base_endpoint = 'https://{}.zendesk.com/api/v2/locales/{}.json?'.format(subdomain, zendesk_official_langs[l])
        # strings = '[' + ','.join(trans_items) + ']'
        strings=trans_object # currently only passing one translation object
        query_params = 'include=translations&strings={}'.format(strings)

        r = requests.get(base_endpoint + query_params)
        content = json.loads(r.content.decode('UTF-8'))
        # Try/Except for 'translation missing' is a bit hacky
        try:
            trans_str = content['locale']['translations'][trans_object]
            zendesk_trans[trans_object].append(trans_str)
        except KeyError:
            print("Missing Translation")
            continue
    
    print(zendesk_trans)
    return zendesk_trans

 def get_longest_translations( translations ):
    longest = {}
    for key in translations.keys():
        longest[key] = max(translations[key], key=len)

    return longest

 def display_longest_translations( longest ):
    for key in longest.keys():
        print("The longest translation for: ")
        print(key)
        print("is", "'" + longest[key] + "'")
        print("which is {} characters long.".format(len(longest[key])))
    print()

 headers = {'User-Agent':
 "Mozilla/4.0 (\
 compatible;\
 MSIE 6.0;\
 Windows NT 5.1;\
 SV1;\
 .NET CLR 1.1.4322;\
 .NET CLR 2.0.50727;\
 .NET CLR 3.0.04506.30\
 )"}

 # https://github.com/mouuff/mtranslate/blob/master/mtranslate/core.py
 def g_translate(to_translate, to_language="auto", from_language="auto"):
    """Returns the translation using google translate
    you must shortcut the language you define
    (French = fr, English = en, Spanish = es, etc...)
    if not defined it will detect it or use english by default
    Example:
    print(translate("salut tu vas bien?", "en"))
    hello you alright?
    """
    base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
    to_translate = quote(to_translate)
    link = base_link % (to_language, from_language, to_translate)
    r = requests.get(link, headers=headers)
    raw_data = r.content
    data = raw_data.decode("utf-8")
    expr = r'class="t0">(.*?)<'
    re_result = re.findall(expr, data)
    if (len(re_result) == 0):
        result = ""
    else:
        result = html.unescape(re_result[0])
    return (result)

 def get_google_translations(*strings, locales, google_langs):
    g_translations = {}
    for l in locales:
        l = zen_locale_to_googlelang(l, google_langs) # map zendesk locales to google translate languages
    for s in strings:
        g_translations[s] = []
        for l in locales:
            out_lang = l['locale']
            t = g_translate(s, out_lang)
            g_translations[s].append(t)

    return g_translations


 # In[13]:

 google_translate_langs = {
 'Afrikaans' : 'af',
 'Albanian' : 'sq',
 'Amharic' : 'am',
 'Arabic' : 'ar',
 'Armenian' : 'hy',
 'Azeerbaijani' : 'az',
 'Basque' : 'eu',
 'Belarusian' : 'be',
 'Bengali' : 'bn',
 'Bosnian' : 'bs',
 'Bulgarian' : 'bg',
 'Catalan' : 'ca',
 'Cebuano' : 'ceb',
 'Chichewa' : 'ny',
 'Chinese (Simplified)' : 'zh-CN',
 'Chinese (Traditional)' : 'zh-TW',
 'Corsican' : 'co',
 'Croatian' : 'hr',
 'Czech' : 'cs',
 'Danish' : 'da',
 'Dutch' : 'nl',
 'English' : 'en',
 'Esperanto' : 'eo',
 'Estonian' : 'et',
 'Filipino' : 'tl',
 'Finnish' : 'fi',
 'French' : 'fr',
 'Frisian' : 'fy',
 'Galician' : 'gl',
 'Georgian' : 'ka',
 'German' : 'de',
 'Greek' : 'el',
 'Gujarati' : 'gu',
 'Haitian Creole' : 'ht',
 'Hausa' : 'ha',
 'Hawaiian' : 'haw',
 'Hebrew' : 'iw',
 'Hindi' : 'hi',
 'Hmong' : 'hmn',
 'Hungarian' : 'hu',
 'Icelandic' : 'is',
 'Igbo' : 'ig',
 'Indonesian' : 'id',
 'Irish' : 'ga',
 'Italian' : 'it',
 'Japanese' : 'ja',
 'Javanese' : 'jw',
 'Kannada' : 'kn',
 'Kazakh' : 'kk',
 'Khmer' : 'km',
 'Korean' : 'ko',
 'Kurdish' : 'ku',
 'Kyrgyz' : 'ky',
 'Lao' : 'lo',
 'Latin' : 'la',
 'Latvian' : 'lv',
 'Lithuanian' : 'lt',
 'Luxembourgish' : 'lb',
 'Macedonian' : 'mk',
 'Malagasy' : 'mg',
 'Malay' : 'ms',
 'Malayalam' : 'ml',
 'Maltese' : 'mt',
 'Maori' : 'mi',
 'Marathi' : 'mr',
 'Mongolian' : 'mn',
 'Burmese' : 'my',
 'Nepali' : 'ne',
 'Norwegian' : 'no',
 'Pashto' : 'ps',
 'Persian' : 'fa',
 'Polish' : 'pl',
 'Portuguese' : 'pt',
 'Punjabi' : 'ma',
 'Romanian' : 'ro',
 'Russian' : 'ru',
 'Samoan' : 'sm',
 'Scots Gaelic' : 'gd',
 'Serbian' : 'sr',
 'Sesotho' : 'st',
 'Shona' : 'sn',
 'Sindhi' : 'sd',
 'Sinhala' : 'si',
 'Slovak' : 'sk',
 'Slovenian' : 'sl',
 'Somali' : 'so',
 'Spanish' : 'es',
 'Sundanese' : 'su',
 'Swahili' : 'sw',
 'Swedish' : 'sv',
 'Tajik' : 'tg',
 'Tamil' : 'ta',
 'Telugu' : 'te',
 'Thai' : 'th',
 'Turkish' : 'tr',
 'Ukrainian' : 'uk',
 'Urdu' : 'ur',
 'Uzbek' : 'uz',
 'Vietnamese' : 'vi',
 'Welsh' : 'cy',
 'Xhosa' : 'xh',
 'Yiddish' : 'yi',
 'Yoruba' : 'yo',
 'Zulu' : 'zu',
 }


 # In[14]:

 zendesk_official_langs = {
 'Arabic' : 'ar',
 'Bulgarian' : 'bg',
 'Catalan' : 'ca',
 'Czech' : 'cs',
 'Danish' : 'da',
 'German' : 'de',
 'Greek' : 'el',
 'English (Canada)' : 'en-CA',
 'English (GB)' : 'en-GB',
 'English' : 'en-US',
 'Spanish' : 'es',
 'Spanish (LATAM)' : 'es-419',
 'Spanish (Spain)' : 'es-ES',
 'Spanish (Mexico)' : 'es-MX',
 'Estonian' : 'et',
 'Persian' : 'fa',
 'Finnish' : 'fi',
 'Filipino' : 'fil',
 'French' : 'fr',
 'French (Canada)' : 'fr-CA',
 'French (France)' : 'fr-FR',
 'Hebrew' : 'he',
 'Hindi' : 'hi',
 'Croatian' : 'hr',
 'Indonesian' : 'id',
 'Icelandic' : 'is',
 'Italian' : 'it',
 'Japanese' : 'ja',
 'Korean' : 'ko',
 'Lithuanian' : 'lt',
 'Latvian' : 'lv',
 'Malay' : 'ms',
 'Dutch' : 'nl',
 'Norwegian' : 'no',
 'Polish' : 'pl',
 'Portuguese' : 'pt',
 'Brazilian Portuguese' : 'pt-BR',
 'Romanian' : 'ro',
 'Russian' : 'ru',
 'Slovakian' : 'sk',
 'Slovenian' : 'sl',
 'Serbian' : 'sr',
 'Swedish' : 'sv',
 'Thai' : 'th',
 'Turkish' : 'tr',
 'Ukrainian' : 'uk',
 'Vietnamese' : 'vi',
 'Simplified Chinese' : 'zh-CN',
 'Traditional Chinese' : 'zh-TW',
 }  


 # In[15]:

 zendesk_crowdsourced_langs = {
 'Afrikaans' : 'af',
 'Azerbaijani' : 'az',
 'Catalan' : 'ca',
 'Estonian' : 'et',
 'Basque' : 'eu',
 'Croatian' : 'hr',
 'Georgian' : 'ka',
 'Hungarian' : 'hu',
 'Icelandic' : 'is',
 'Lithuanian' : 'lt',
 'Latvian' : 'lv',
 'Pashto' : 'ps',
 'Slovakian' : 'sk',
 'Slovenian' : 'sl',
 'Albanian' : 'sq',
 'Serbian' : 'sr',
 'Serbian (Montenegro)' : 'sr-ME',
 'Ukrainian' : 'uk',
 }

 parser = argparse.ArgumentParser()
 parser.add_argument('-s', '--subdomain', nargs='?', help='The subdomain to which the translation is related. "Support" is default')
 parser.add_argument('-e', '--existing', default=None, help='The name of an existing translation item')
 parser.add_argument('-n', '--new', default=None, help='If a translation does not yet exist in the subdomain, enter a string.')
 args = parser.parse_args()

 if args.subdomain == None:
    args.subdomain = 'support'
    print("Using default subdomain: 'support' (Z1)")

 if args.existing and args.new:
    sys.exit("Cannot enter both an existing translation object AND a new translation string.")
 elif args.new:
    locales = get_subdomain_locales(args.subdomain)
    longest = get_longest_translations( get_google_translations(
                                                                args.new,
                                                                locales=locales,
                                                                google_langs=google_translate_langs
                                                               )
                                      )
    display_longest_translations(longest)

 elif args.existing:
    locales = get_subdomain_locales(args.subdomain)
    longest = get_longest_translations( get_zendesk_translations(
                                                                 args.subdomain,
                                                                 args.existing
                                                                )
                                      )
    display_longest_translations(longest)

 else:
    sys.exit("Please enter either an existing translation object OR a new translation string")

	# coding: utf-8

	# In[35]:

	import argparse
	import html
	import json
	import requests
	from requests.utils import quote
	import re
	import sys
	from bs4 import BeautifulSoup


	# In[ ]:
	# In[37]:

	def get_subdomain_locales(subdomain):
	locales_url = 'https://{}.zendesk.com/api/v2/locales.json'.format(subdomain)
	r = requests.get(locales_url)
	content = json.loads(r.content.decode('UTF-8'))

	return content['locales']


	def zen_locale_to_googlelang(zen_locale, google_langs):
	code = zen_locale['locale']
	native_locale_name = zen_locale['name']
	english_locale_name = zen_locale['presentation_name'].split(' - ')
	# try match code
	for key in google_langs.keys():
	if google_langs[key] == code:
	return code
	# try match 'part-1' of code
	code_first_part = code.split('-')[0]
	for key in google_langs.keys():
	if google_langs[key] == code_first_part:
	return code_first_part
	# failed to match code, try matching against name
	for key in google_langs.keys():
	if english_local_name.lower() == key.lower():
	return google_langs[key]
	# failed to match
	return None

	def get_zendesk_translations(subdomain, *trans_items):
	zendesk_trans = {}
	trans_object = trans_items[0] # only doing one atm
	zendesk_trans[trans_object] = []
	for l in zendesk_official_langs.keys():
	base_endpoint = 'https://{}.zendesk.com/api/v2/locales/{}.json?'.format(subdomain, zendesk_official_langs[l])
	# strings = '[' + ','.join(trans_items) + ']'
	strings=trans_object # currently only passing one translation object
	query_params = 'include=translations&strings={}'.format(strings)

	r = requests.get(base_endpoint + query_params)
	content = json.loads(r.content.decode('UTF-8'))
	# Try/Except for 'translation missing' is a bit hacky
	try:
	trans_str = content['locale']['translations'][trans_object]
	zendesk_trans[trans_object].append(trans_str)
	except KeyError:
	print("Missing Translation")
	continue

	print(zendesk_trans)
	return zendesk_trans

	def get_longest_translations( translations ):
	longest = {}
	for key in translations.keys():
	longest[key] = max(translations[key], key=len)

	return longest

	def display_longest_translations( longest ):
	for key in longest.keys():
	print("The longest translation for: ")
	print(key)
	print("is", "'" + longest[key] + "'")
	print("which is {} characters long.".format(len(longest[key])))
	print()

	headers = {'User-Agent':
	"Mozilla/4.0 (\
	compatible;\
	MSIE 6.0;\
	Windows NT 5.1;\
	SV1;\
	.NET CLR 1.1.4322;\
	.NET CLR 2.0.50727;\
	.NET CLR 3.0.04506.30\
	)"}

	# https://github.com/mouuff/mtranslate/blob/master/mtranslate/core.py
	def g_translate(to_translate, to_language="auto", from_language="auto"):
	"""Returns the translation using google translate
	you must shortcut the language you define
	(French = fr, English = en, Spanish = es, etc...)
	if not defined it will detect it or use english by default
	Example:
	print(translate("salut tu vas bien?", "en"))
	hello you alright?
	"""
	base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
	to_translate = quote(to_translate)
	link = base_link % (to_language, from_language, to_translate)
	r = requests.get(link, headers=headers)
	raw_data = r.content
	data = raw_data.decode("utf-8")
	expr = r'class="t0">(.*?)<'
	re_result = re.findall(expr, data)
	if (len(re_result) == 0):
	result = ""
	else:
	result = html.unescape(re_result[0])
	return (result)

	def get_google_translations(*strings, locales, google_langs):
	g_translations = {}
	for l in locales:
	l = zen_locale_to_googlelang(l, google_langs) # map zendesk locales to google translate languages
	for s in strings:
	g_translations[s] = []
	for l in locales:
	out_lang = l['locale']
	t = g_translate(s, out_lang)
	g_translations[s].append(t)

	return g_translations


	# In[13]:

	google_translate_langs = {
	'Afrikaans' : 'af',
	'Albanian' : 'sq',
	'Amharic' : 'am',
	'Arabic' : 'ar',
	'Armenian' : 'hy',
	'Azeerbaijani' : 'az',
	'Basque' : 'eu',
	'Belarusian' : 'be',
	'Bengali' : 'bn',
	'Bosnian' : 'bs',
	'Bulgarian' : 'bg',
	'Catalan' : 'ca',
	'Cebuano' : 'ceb',
	'Chichewa' : 'ny',
	'Chinese (Simplified)' : 'zh-CN',
	'Chinese (Traditional)' : 'zh-TW',
	'Corsican' : 'co',
	'Croatian' : 'hr',
	'Czech' : 'cs',
	'Danish' : 'da',
	'Dutch' : 'nl',
	'English' : 'en',
	'Esperanto' : 'eo',
	'Estonian' : 'et',
	'Filipino' : 'tl',
	'Finnish' : 'fi',
	'French' : 'fr',
	'Frisian' : 'fy',
	'Galician' : 'gl',
	'Georgian' : 'ka',
	'German' : 'de',
	'Greek' : 'el',
	'Gujarati' : 'gu',
	'Haitian Creole' : 'ht',
	'Hausa' : 'ha',
	'Hawaiian' : 'haw',
	'Hebrew' : 'iw',
	'Hindi' : 'hi',
	'Hmong' : 'hmn',
	'Hungarian' : 'hu',
	'Icelandic' : 'is',
	'Igbo' : 'ig',
	'Indonesian' : 'id',
	'Irish' : 'ga',
	'Italian' : 'it',
	'Japanese' : 'ja',
	'Javanese' : 'jw',
	'Kannada' : 'kn',
	'Kazakh' : 'kk',
	'Khmer' : 'km',
	'Korean' : 'ko',
	'Kurdish' : 'ku',
	'Kyrgyz' : 'ky',
	'Lao' : 'lo',
	'Latin' : 'la',
	'Latvian' : 'lv',
	'Lithuanian' : 'lt',
	'Luxembourgish' : 'lb',
	'Macedonian' : 'mk',
	'Malagasy' : 'mg',
	'Malay' : 'ms',
	'Malayalam' : 'ml',
	'Maltese' : 'mt',
	'Maori' : 'mi',
	'Marathi' : 'mr',
	'Mongolian' : 'mn',
	'Burmese' : 'my',
	'Nepali' : 'ne',
	'Norwegian' : 'no',
	'Pashto' : 'ps',
	'Persian' : 'fa',
	'Polish' : 'pl',
	'Portuguese' : 'pt',
	'Punjabi' : 'ma',
	'Romanian' : 'ro',
	'Russian' : 'ru',
	'Samoan' : 'sm',
	'Scots Gaelic' : 'gd',
	'Serbian' : 'sr',
	'Sesotho' : 'st',
	'Shona' : 'sn',
	'Sindhi' : 'sd',
	'Sinhala' : 'si',
	'Slovak' : 'sk',
	'Slovenian' : 'sl',
	'Somali' : 'so',
	'Spanish' : 'es',
	'Sundanese' : 'su',
	'Swahili' : 'sw',
	'Swedish' : 'sv',
	'Tajik' : 'tg',
	'Tamil' : 'ta',
	'Telugu' : 'te',
	'Thai' : 'th',
	'Turkish' : 'tr',
	'Ukrainian' : 'uk',
	'Urdu' : 'ur',
	'Uzbek' : 'uz',
	'Vietnamese' : 'vi',
	'Welsh' : 'cy',
	'Xhosa' : 'xh',
	'Yiddish' : 'yi',
	'Yoruba' : 'yo',
	'Zulu' : 'zu',
	}


	# In[14]:

	zendesk_official_langs = {
	'Arabic' : 'ar',
	'Bulgarian' : 'bg',
	'Catalan' : 'ca',
	'Czech' : 'cs',
	'Danish' : 'da',
	'German' : 'de',
	'Greek' : 'el',
	'English (Canada)' : 'en-CA',
	'English (GB)' : 'en-GB',
	'English' : 'en-US',
	'Spanish' : 'es',
	'Spanish (LATAM)' : 'es-419',
	'Spanish (Spain)' : 'es-ES',
	'Spanish (Mexico)' : 'es-MX',
	'Estonian' : 'et',
	'Persian' : 'fa',
	'Finnish' : 'fi',
	'Filipino' : 'fil',
	'French' : 'fr',
	'French (Canada)' : 'fr-CA',
	'French (France)' : 'fr-FR',
	'Hebrew' : 'he',
	'Hindi' : 'hi',
	'Croatian' : 'hr',
	'Indonesian' : 'id',
	'Icelandic' : 'is',
	'Italian' : 'it',
	'Japanese' : 'ja',
	'Korean' : 'ko',
	'Lithuanian' : 'lt',
	'Latvian' : 'lv',
	'Malay' : 'ms',
	'Dutch' : 'nl',
	'Norwegian' : 'no',
	'Polish' : 'pl',
	'Portuguese' : 'pt',
	'Brazilian Portuguese' : 'pt-BR',
	'Romanian' : 'ro',
	'Russian' : 'ru',
	'Slovakian' : 'sk',
	'Slovenian' : 'sl',
	'Serbian' : 'sr',
	'Swedish' : 'sv',
	'Thai' : 'th',
	'Turkish' : 'tr',
	'Ukrainian' : 'uk',
	'Vietnamese' : 'vi',
	'Simplified Chinese' : 'zh-CN',
	'Traditional Chinese' : 'zh-TW',
	}


	# In[15]:

	zendesk_crowdsourced_langs = {
	'Afrikaans' : 'af',
	'Azerbaijani' : 'az',
	'Catalan' : 'ca',
	'Estonian' : 'et',
	'Basque' : 'eu',
	'Croatian' : 'hr',
	'Georgian' : 'ka',
	'Hungarian' : 'hu',
	'Icelandic' : 'is',
	'Lithuanian' : 'lt',
	'Latvian' : 'lv',
	'Pashto' : 'ps',
	'Slovakian' : 'sk',
	'Slovenian' : 'sl',
	'Albanian' : 'sq',
	'Serbian' : 'sr',
	'Serbian (Montenegro)' : 'sr-ME',
	'Ukrainian' : 'uk',
	}

	parser = argparse.ArgumentParser()
	parser.add_argument('-s', '--subdomain', nargs='?', help='The subdomain to which the translation is related. "Support" is default')
	parser.add_argument('-e', '--existing', default=None, help='The name of an existing translation item')
	parser.add_argument('-n', '--new', default=None, help='If a translation does not yet exist in the subdomain, enter a string.')
	args = parser.parse_args()

	if args.subdomain == None:
	args.subdomain = 'support'
	print("Using default subdomain: 'support' (Z1)")

	if args.existing and args.new:
	sys.exit("Cannot enter both an existing translation object AND a new translation string.")
	elif args.new:
	locales = get_subdomain_locales(args.subdomain)
	longest = get_longest_translations( get_google_translations(
	args.new,
	locales=locales,
	google_langs=google_translate_langs
	)
	)
	display_longest_translations(longest)

	elif args.existing:
	locales = get_subdomain_locales(args.subdomain)
	longest = get_longest_translations( get_zendesk_translations(
	args.subdomain,
	args.existing
	)
	)
	display_longest_translations(longest)

	else:
	sys.exit("Please enter either an existing translation object OR a new translation string")
No results found