-
-
Save DollarAkshay/ba269dfd435d65d301ffa89910cfc933 to your computer and use it in GitHub Desktop.
import json | |
import googletrans | |
import os | |
from googletrans import Translator | |
def translateString(data, destLangCode): | |
global translator | |
if isinstance(data, dict): | |
return {k: translateString(v, destLangCode) for k, v in data.items()} | |
else: | |
return translator.translate(data, src='en', dest=destLangCode).text | |
# Main Code | |
all_languages = {'af': 'afrikaans', 'sq': 'albanian', 'am': 'amharic', 'ar': 'arabic', 'hy': 'armenian', 'az': 'azerbaijani', 'eu': 'basque', 'be': 'belarusian', 'bn': 'bengali', 'bs': 'bosnian', 'bg': 'bulgarian', 'ca': 'catalan', 'ceb': 'cebuano', 'ny': 'chichewa', 'zh-cn': 'chinese (simplified)', 'zh-tw': 'chinese (traditional)', 'co': 'corsican', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish', 'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish', 'fr': 'french', 'fy': 'frisian', 'gl': 'galician', 'ka': 'georgian', 'de': 'german', 'el': 'greek', 'gu': 'gujarati', 'ht': 'haitian creole', 'ha': 'hausa', 'haw': 'hawaiian', 'iw': | |
'hebrew', 'he': 'hebrew', 'hi': 'hindi', 'hmn': 'hmong', 'hu': 'hungarian', 'is': 'icelandic', 'ig': 'igbo', 'id': 'indonesian', 'ga': 'irish', 'it': 'italian', 'ja': 'japanese', 'jw': 'javanese', 'kn': 'kannada', 'kk': 'kazakh', 'km': 'khmer', 'ko': 'korean', 'ku': 'kurdish (kurmanji)', 'ky': 'kyrgyz', 'lo': 'lao', 'la': 'latin', 'lv': 'latvian', 'lt': 'lithuanian', 'lb': 'luxembourgish', 'mk': 'macedonian', 'mg': 'malagasy', 'ms': 'malay', 'ml': 'malayalam', 'mt': 'maltese', 'mi': 'maori', 'mr': 'marathi', 'mn': 'mongolian', 'my': 'myanmar (burmese)', 'ne': 'nepali', 'no': 'norwegian', 'or': 'odia', 'ps': 'pashto', 'fa': 'persian', 'pl': 'polish', 'pt': 'portuguese', | |
'pa': 'punjabi', 'ro': 'romanian', 'ru': 'russian', 'sm': 'samoan', 'gd': 'scots gaelic', 'sr': 'serbian', 'st': 'sesotho', 'sn': 'shona', 'sd': 'sindhi', 'si': 'sinhala', 'sk': 'slovak', 'sl': 'slovenian', 'so': 'somali', 'es': 'spanish', 'su': 'sundanese', 'sw': 'swahili', 'sv': 'swedish', 'tg': 'tajik', 'ta': 'tamil', 'te': 'telugu', 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'ur': 'urdu', 'ug': 'uyghur', 'uz': 'uzbek', 'vi': 'vietnamese', 'cy': 'welsh', 'xh': 'xhosa', 'yi': 'yiddish', 'yo': 'yoruba', 'zu': 'zulu'} | |
dirname = os.path.dirname(__file__) | |
src_filename = os.path.join(dirname, 'english.json') | |
destLangCodeList = [ | |
'hi', | |
'kn', | |
'or', | |
'bn', | |
'gu', | |
'pa', | |
'ml', | |
'ta', | |
'te' | |
] | |
translator = Translator() | |
for destLangCode in destLangCodeList: | |
print('Starting translation for {:} ... '.format(all_languages[destLangCode]), end="") | |
with open(src_filename, 'r', encoding="utf-8") as fin: | |
data = json.load(fin) | |
translated_json = translateString(data, destLangCode) | |
dest_filename = os.path.join(dirname, all_languages[destLangCode]+'.json') | |
with open(dest_filename, 'w', encoding="utf-8") as fout: | |
json_dumps_str = json.dumps(translated_json, indent=4, ensure_ascii=False) | |
fout.write(json_dumps_str) | |
print('done') |
My code in scrapper.py:
import requests
import time
from bs4 import BeautifulSoup
from googletrans import Translator
class FastWebScrapingAPI:
def scrape_data(self,categories):
url_news = f"https://thehackernews.com/search/label/{categories}"
res = requests.get(url_news)
soup = BeautifulSoup(res.content, "html.parser")
articles = soup.find_all("a",{"class":"story-link"})
scrappedNews = []
translator = Translator()
for article in articles:
news = {
"title_en": article.find("h2",{"class":"home-title"}).text.strip(),
"desc_en": article.find("div",{"class":"home-desc"}).text.strip(),
"title_fr": "",
"desc_fr": "",
"timestamp": time.time(),
"image_url": article.find("div",{"class":"img-ratio"}).img.get("data-src")
}
#translate_title = translator.translate(str(list(news.values())[0])), src='en', dest='fr')
#translate_desc = translator.translate(list(news.values())[1], src='en', dest='fr')
#print(translate_title.text)
#print(translate_desc.text)
str1 = "French title"
str2 = "French description"
#str1_translate = translator.translate(str1, src='en', dest='fr')
#str2_translate = translator.translate(str2, src='en', dest='fr')
#news.update({"title_fr":str1_translate.text})
#news.update({"desc_fr":str2_translate.text})
news.update({"title_fr":str1})
news.update({"desc_fr":str2})
scrappedNews.append(news)
return scrappedNews
I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'
I am getting this error => AttributeError: 'NoneType' object has no attribute 'group'
same here
@SNEHAASHISH , @myselfhimself In case you didn't solve this uninstall googletrans
pip uninstall googletrans
And then install the new version with
pip install googletrans==4.0.0rc1
How to modify the code to modify dict values in FastAPI dict
For example I take the value of "desc_en" and translate it using your code's logic and update the value of "desc_fr" with the translated string