Created
September 3, 2025 10:41
-
-
Save me-suzy/44c340bd17492348669053124992334a to your computer and use it in GitHub Desktop.
sterge ghilimelele 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import regex | |
def remove_quotes_from_meta_tags(html_content): | |
# Pattern pentru <meta name="description" content="..."> | |
pattern1 = regex.compile(r'(<meta name="description" content=")(.*?)(">)', regex.DOTALL) | |
# Pattern pentru <meta property="og:description" content="..."> | |
pattern2 = regex.compile(r'(<meta property="og:description" content=")(.*?)(">)', regex.DOTALL) | |
# Pattern pentru JSON "description": "..." | |
pattern3 = regex.compile(r'("description": ")(.*?)(",)', regex.DOTALL) | |
def clean_content(match): | |
# Eliminarea ghilimelelor, apostrofurilor și caracterului '|' din conținut | |
cleaned_content = (match.group(2) | |
.replace('„', '') | |
.replace('"', '') | |
.replace('"', '') | |
.replace("'", '') | |
.replace("'", '') | |
.replace("'", '') | |
.replace('|', '')) | |
return match.group(1) + cleaned_content + match.group(3) | |
# Aplicarea tuturor pattern-urilor | |
content = pattern1.sub(clean_content, html_content) | |
content = pattern2.sub(clean_content, content) | |
content = pattern3.sub(clean_content, content) | |
return content | |
def process_html_files(folder_path): | |
for filename in os.listdir(folder_path): | |
if filename.lower().endswith('.html'): | |
file_path = os.path.join(folder_path, filename) | |
print(f'Procesare: {file_path}') | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
new_content = remove_quotes_from_meta_tags(content) | |
with open(file_path, 'w', encoding='utf-8') as file: | |
file.write(new_content) | |
print('Toate fișierele au fost modificate.') | |
# Calea către folderul cu fișierele HTML | |
folder_path = r'e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\OANA\Folder-Oana\test' | |
# Procesarea fișierelor | |
process_html_files(folder_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment