me-suzy · June 30, 2023 05:29
diff --git a/gistfile1.txt b/gistfile1.txt
 import re
 import os
 from bs4 import BeautifulSoup
 from bs4.formatter import HTMLFormatter
 from googletrans import Translator
 import requests

 translator = Translator()

 class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            yield k, v

 files_from_folder = r"c:\\Folder3\1"
 use_translate_folder = True
 destination_language = 'ru'
 extension_file = ".html"
 directory = os.fsencode(files_from_folder)

 def recursively_translate(node):
    for x in range(len(node.contents)):
        if isinstance(node.contents[x], str):
            if node.contents[x].strip() != '':
                if 'pastebin.com' not in node.contents[x]:  # Exclude contents with 'pastebin.com'
                    try:
                        node.contents[x].replaceWith(translator.translate(node.contents[x], dest=destination_language).text)
                    except:
                        pass
        elif node.contents[x] != None:
            recursively_translate(node.contents[x])

 amount = 1
 for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
        continue
    if filename.endswith(extension_file):
        original_html = open(os.path.join(files_from_folder, filename), 'r', encoding='utf-8').read()

        with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
            soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')


            # Meta tags replacement
            meta_tags = soup.find_all('meta', {'http-equiv': 'Content-Language'}) + \
                        soup.find_all('meta', {'property': 'og:locale'}) + \
                        soup.find_all('script', {'type': 'application/ld+json'})

            for meta_tag in meta_tags:
                if meta_tag.has_attr('content'):
                    meta_tag['content'] = re.sub(r'\b\w+\b', destination_language, meta_tag['content'])
                elif meta_tag.string:
                    meta_tag.string = re.sub(r'https://neculaifantanaru.com/\b\w+\b/about.html', f'https://neculaifantanaru.com/{destination_language}/about.html', meta_tag.string)


            tags_to_update = soup.find_all('meta', {'http-equiv': 'Content-Language'}) + \
                             soup.find_all('meta', {'property': 'og:locale'}) + \
                             soup.find_all('meta', {'property': 'og:url'}) + \
                             soup.find_all('link', {'rel': 'canonical'}) + \
                             soup.find_all('html') + \
                             soup.find_all('script', {'type': 'application/ld+json'})

            for tag in tags_to_update:
                if tag.has_attr('content'):
                    tag['content'] = re.sub(r'(?<=\.com/)\w+(?=/)', destination_language, tag['content'], count=1)
                if tag.has_attr('href'):
                    tag['href'] = re.sub(r'(?<=\.com/)\w+(?=/)', destination_language, tag['href'], count=1)
                if tag.has_attr('lang'):
                    tag['lang'] = destination_language
                if tag.string:
                    tag.string = re.sub(r'(?<="url": "https://neculaifantanaru\.com/)\w+(?=/about\.html)', destination_language, tag.string, count=1)



            for title in soup.findAll('title'):
                recursively_translate(title)

            for meta in soup.findAll('meta', {'name':'description'}):
                try:
                    meta['content'] = translator.translate(meta['content'], dest=destination_language).text
                except:
                    pass

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

            for p in soup.findAll('p', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for p in soup.findAll('p', class_='text_obisnuit2'):

                    recursively_translate(p)

            for p in soup.findAll('p', class_='NOU'):
                recursively_translate(p)

            for span in soup.findAll('span', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(span)) < end_comment:
                    recursively_translate(span)

            for li in soup.findAll('li', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(li)) < end_comment:
                    recursively_translate(li)

            for a in soup.findAll('a', class_='linkMare'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(a)) < end_comment:
                    recursively_translate(a)

            for h4 in soup.findAll('h4', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h4)) < end_comment:
                    recursively_translate(h4)

            for h5 in soup.findAll('h5', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h5)) < end_comment:
                    recursively_translate(h5)

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

            for h3 in soup.findAll('h3', class_='font-weight-normal'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h3)) < end_comment:
                    recursively_translate(h3)

            for h3 in soup.findAll('h3', class_='font-weight-normal'):
                begin_comment = str(soup).index('<!-- Blog List Inner -->')
                end_comment = str(soup).index('<!-- ARTICOL START -->')
                if begin_comment < str(soup).index(str(h3)) < end_comment:
                    recursively_translate(h3)

            for span in soup.findAll('span', class_='online'):
                begin_comment = str(soup).index('<!-- post -->')
                end_comment = str(soup).index('<!-- ARTICOL START -->')
                if begin_comment < str(soup).index(str(span)) < end_comment:
                    recursively_translate(span)

            for p in soup.findAll('p', class_='mb-40px'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for p in soup.findAll('p', class_='mb-35px color-grey line-height-25px'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

                print(f'{filename} translated ({amount})')
            amount += 1


            soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
            new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
            if use_translate_folder:
                try:
                    with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                        new_html.write(soup[5:-6])
                except:
                    os.mkdir(files_from_folder+r'\translated')
                    with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                        new_html.write(soup[5:-6])
            else:
                with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
                    html.write(soup[5:-6])
	import re
	import os
	from bs4 import BeautifulSoup
	from bs4.formatter import HTMLFormatter
	from googletrans import Translator
	import requests

	translator = Translator()

	class UnsortedAttributes(HTMLFormatter):
	def attributes(self, tag):
	for k, v in tag.attrs.items():
	yield k, v

	files_from_folder = r"c:\\Folder3\1"
	use_translate_folder = True
	destination_language = 'ru'
	extension_file = ".html"
	directory = os.fsencode(files_from_folder)

	def recursively_translate(node):
	for x in range(len(node.contents)):
	if isinstance(node.contents[x], str):
	if node.contents[x].strip() != '':
	if 'pastebin.com' not in node.contents[x]: # Exclude contents with 'pastebin.com'
	try:
	node.contents[x].replaceWith(translator.translate(node.contents[x], dest=destination_language).text)
	except:
	pass
	elif node.contents[x] != None:
	recursively_translate(node.contents[x])

	amount = 1
	for file in os.listdir(directory):
	filename = os.fsdecode(file)
	print(filename)
	if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
	continue
	if filename.endswith(extension_file):
	original_html = open(os.path.join(files_from_folder, filename), 'r', encoding='utf-8').read()

	with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
	soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')


	# Meta tags replacement
	meta_tags = soup.find_all('meta', {'http-equiv': 'Content-Language'}) + \
	soup.find_all('meta', {'property': 'og:locale'}) + \
	soup.find_all('script', {'type': 'application/ld+json'})

	for meta_tag in meta_tags:
	if meta_tag.has_attr('content'):
	meta_tag['content'] = re.sub(r'\b\w+\b', destination_language, meta_tag['content'])
	elif meta_tag.string:
	meta_tag.string = re.sub(r'https://neculaifantanaru.com/\b\w+\b/about.html', f'https://neculaifantanaru.com/{destination_language}/about.html', meta_tag.string)


	tags_to_update = soup.find_all('meta', {'http-equiv': 'Content-Language'}) + \
	soup.find_all('meta', {'property': 'og:locale'}) + \
	soup.find_all('meta', {'property': 'og:url'}) + \
	soup.find_all('link', {'rel': 'canonical'}) + \
	soup.find_all('html') + \
	soup.find_all('script', {'type': 'application/ld+json'})

	for tag in tags_to_update:
	if tag.has_attr('content'):
	tag['content'] = re.sub(r'(?<=\.com/)\w+(?=/)', destination_language, tag['content'], count=1)
	if tag.has_attr('href'):
	tag['href'] = re.sub(r'(?<=\.com/)\w+(?=/)', destination_language, tag['href'], count=1)
	if tag.has_attr('lang'):
	tag['lang'] = destination_language
	if tag.string:
	tag.string = re.sub(r'(?<="url": "https://neculaifantanaru\.com/)\w+(?=/about\.html)', destination_language, tag.string, count=1)



	for title in soup.findAll('title'):
	recursively_translate(title)

	for meta in soup.findAll('meta', {'name':'description'}):
	try:
	meta['content'] = translator.translate(meta['content'], dest=destination_language).text
	except:
	pass

	for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(h1)) < end_comment:
	recursively_translate(h1)

	for p in soup.findAll('p', class_='text_obisnuit'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(p)) < end_comment:
	recursively_translate(p)

	for p in soup.findAll('p', class_='text_obisnuit2'):

	recursively_translate(p)

	for p in soup.findAll('p', class_='NOU'):
	recursively_translate(p)

	for span in soup.findAll('span', class_='text_obisnuit2'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(span)) < end_comment:
	recursively_translate(span)

	for li in soup.findAll('li', class_='text_obisnuit'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(li)) < end_comment:
	recursively_translate(li)

	for a in soup.findAll('a', class_='linkMare'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(a)) < end_comment:
	recursively_translate(a)

	for h4 in soup.findAll('h4', class_='text_obisnuit2'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(h4)) < end_comment:
	recursively_translate(h4)

	for h5 in soup.findAll('h5', class_='text_obisnuit2'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(h5)) < end_comment:
	recursively_translate(h5)

	for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(h1)) < end_comment:
	recursively_translate(h1)

	for h3 in soup.findAll('h3', class_='font-weight-normal'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(h3)) < end_comment:
	recursively_translate(h3)

	for h3 in soup.findAll('h3', class_='font-weight-normal'):
	begin_comment = str(soup).index('<!-- Blog List Inner -->')
	end_comment = str(soup).index('<!-- ARTICOL START -->')
	if begin_comment < str(soup).index(str(h3)) < end_comment:
	recursively_translate(h3)

	for span in soup.findAll('span', class_='online'):
	begin_comment = str(soup).index('<!-- post -->')
	end_comment = str(soup).index('<!-- ARTICOL START -->')
	if begin_comment < str(soup).index(str(span)) < end_comment:
	recursively_translate(span)

	for p in soup.findAll('p', class_='mb-40px'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(p)) < end_comment:
	recursively_translate(p)

	for p in soup.findAll('p', class_='mb-35px color-grey line-height-25px'):
	begin_comment = str(soup).index('<!-- ARTICOL START -->')
	end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
	if begin_comment < str(soup).index(str(p)) < end_comment:
	recursively_translate(p)

	print(f'{filename} translated ({amount})')
	amount += 1


	soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
	new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
	if use_translate_folder:
	try:
	with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
	new_html.write(soup[5:-6])
	except:
	os.mkdir(files_from_folder+r'\translated')
	with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
	new_html.write(soup[5:-6])
	else:
	with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
	html.write(soup[5:-6])