Created
March 16, 2021 06:04
-
-
Save DollarAkshay/ba269dfd435d65d301ffa89910cfc933 to your computer and use it in GitHub Desktop.
Script to translate JSON strings with googletrans library in python. Useful for web language localization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import googletrans | |
import os | |
from googletrans import Translator | |
def translateString(data, destLangCode): | |
global translator | |
if isinstance(data, dict): | |
return {k: translateString(v, destLangCode) for k, v in data.items()} | |
else: | |
return translator.translate(data, src='en', dest=destLangCode).text | |
# Main Code | |
all_languages = {'af': 'afrikaans', 'sq': 'albanian', 'am': 'amharic', 'ar': 'arabic', 'hy': 'armenian', 'az': 'azerbaijani', 'eu': 'basque', 'be': 'belarusian', 'bn': 'bengali', 'bs': 'bosnian', 'bg': 'bulgarian', 'ca': 'catalan', 'ceb': 'cebuano', 'ny': 'chichewa', 'zh-cn': 'chinese (simplified)', 'zh-tw': 'chinese (traditional)', 'co': 'corsican', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish', 'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish', 'fr': 'french', 'fy': 'frisian', 'gl': 'galician', 'ka': 'georgian', 'de': 'german', 'el': 'greek', 'gu': 'gujarati', 'ht': 'haitian creole', 'ha': 'hausa', 'haw': 'hawaiian', 'iw': | |
'hebrew', 'he': 'hebrew', 'hi': 'hindi', 'hmn': 'hmong', 'hu': 'hungarian', 'is': 'icelandic', 'ig': 'igbo', 'id': 'indonesian', 'ga': 'irish', 'it': 'italian', 'ja': 'japanese', 'jw': 'javanese', 'kn': 'kannada', 'kk': 'kazakh', 'km': 'khmer', 'ko': 'korean', 'ku': 'kurdish (kurmanji)', 'ky': 'kyrgyz', 'lo': 'lao', 'la': 'latin', 'lv': 'latvian', 'lt': 'lithuanian', 'lb': 'luxembourgish', 'mk': 'macedonian', 'mg': 'malagasy', 'ms': 'malay', 'ml': 'malayalam', 'mt': 'maltese', 'mi': 'maori', 'mr': 'marathi', 'mn': 'mongolian', 'my': 'myanmar (burmese)', 'ne': 'nepali', 'no': 'norwegian', 'or': 'odia', 'ps': 'pashto', 'fa': 'persian', 'pl': 'polish', 'pt': 'portuguese', | |
'pa': 'punjabi', 'ro': 'romanian', 'ru': 'russian', 'sm': 'samoan', 'gd': 'scots gaelic', 'sr': 'serbian', 'st': 'sesotho', 'sn': 'shona', 'sd': 'sindhi', 'si': 'sinhala', 'sk': 'slovak', 'sl': 'slovenian', 'so': 'somali', 'es': 'spanish', 'su': 'sundanese', 'sw': 'swahili', 'sv': 'swedish', 'tg': 'tajik', 'ta': 'tamil', 'te': 'telugu', 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'ur': 'urdu', 'ug': 'uyghur', 'uz': 'uzbek', 'vi': 'vietnamese', 'cy': 'welsh', 'xh': 'xhosa', 'yi': 'yiddish', 'yo': 'yoruba', 'zu': 'zulu'} | |
dirname = os.path.dirname(__file__) | |
src_filename = os.path.join(dirname, 'english.json') | |
destLangCodeList = [ | |
'hi', | |
'kn', | |
'or', | |
'bn', | |
'gu', | |
'pa', | |
'ml', | |
'ta', | |
'te' | |
] | |
translator = Translator() | |
for destLangCode in destLangCodeList: | |
print('Starting translation for {:} ... '.format(all_languages[destLangCode]), end="") | |
with open(src_filename, 'r', encoding="utf-8") as fin: | |
data = json.load(fin) | |
translated_json = translateString(data, destLangCode) | |
dest_filename = os.path.join(dirname, all_languages[destLangCode]+'.json') | |
with open(dest_filename, 'w', encoding="utf-8") as fout: | |
json_dumps_str = json.dumps(translated_json, indent=4, ensure_ascii=False) | |
fout.write(json_dumps_str) | |
print('done') |
I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'
I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'
same here
@SNEHAASHISH , @myselfhimself In case you didn't solve this uninstall googletrans
pip uninstall googletrans
And then install the new version with
pip install googletrans==4.0.0rc1
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My code in scrapper.py:
import requests
import time
from bs4 import BeautifulSoup
from googletrans import Translator
class FastWebScrapingAPI:
def scrape_data(self,categories):
url_news = f"https://thehackernews.com/search/label/{categories}"
res = requests.get(url_news)
soup = BeautifulSoup(res.content, "html.parser")
articles = soup.find_all("a",{"class":"story-link"})
scrappedNews = []
translator = Translator()
for article in articles:
news = {
"title_en": article.find("h2",{"class":"home-title"}).text.strip(),
"desc_en": article.find("div",{"class":"home-desc"}).text.strip(),
"title_fr": "",
"desc_fr": "",
"timestamp": time.time(),
"image_url": article.find("div",{"class":"img-ratio"}).img.get("data-src")
}
#translate_title = translator.translate(str(list(news.values())[0])), src='en', dest='fr')
#translate_desc = translator.translate(list(news.values())[1], src='en', dest='fr')
#print(translate_title.text)
#print(translate_desc.text)
str1 = "French title"
str2 = "French description"
#str1_translate = translator.translate(str1, src='en', dest='fr')
#str2_translate = translator.translate(str2, src='en', dest='fr')
#news.update({"title_fr":str1_translate.text})
#news.update({"desc_fr":str2_translate.text})
news.update({"title_fr":str1})
news.update({"desc_fr":str2})
scrappedNews.append(news)
return scrappedNews