This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import regex | |
def remove_quotes_from_meta_tags(html_content): | |
# Pattern pentru <meta name="description" content="..."> | |
pattern1 = regex.compile(r'(<meta name="description" content=")(.*?)(">)', regex.DOTALL) | |
# Pattern pentru <meta property="og:description" content="..."> | |
pattern2 = regex.compile(r'(<meta property="og:description" content=")(.*?)(">)', regex.DOTALL) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import regex | |
def remove_quotes_from_meta_tags(html_content): | |
# Pattern pentru <meta name="description" content="..."> | |
pattern1 = regex.compile(r'(<meta name="description" content=")(.*?)(">)', regex.DOTALL) | |
# Pattern pentru <meta property="og:description" content="..."> | |
pattern2 = regex.compile(r'(<meta property="og:description" content=")(.*?)(">)', regex.DOTALL) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
def replace_headers_in_html(file_path): | |
# Read the content of the HTML file | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
# Store original content for comparison | |
original_content = content |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
def replace_headers_in_html(file_path): | |
# Read the content of the HTML file | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
# Store original content for comparison | |
original_content = content |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup, Comment | |
import time | |
def get_image_from_local_file(file_path): | |
"""Extrage imaginea din fișierul HTML local""" | |
try: | |
if not os.path.exists(file_path): | |
return None, None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup, Comment | |
import time | |
def get_image_from_article_page(url): | |
"""Extrage imaginea din pagina individuală""" | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup, Comment | |
import time | |
import re | |
def get_image_from_article_page(url): | |
"""Extrage imaginea din pagina individuală a articolului""" | |
try: | |
headers = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
json_data = [ | |
{ | |
"html_file": "lideri-si-atitudine.html", | |
"line_count": 37, | |
"title_links": 37, | |
"read_more_links": 37, | |
"repeated_links": {} | |
}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Ai dreptate, se duplică conținutul. Iată soluția corectă - să ascund complet secțiunea desktop pe mobil: | |
## HTML simplificat: | |
```html | |
<article class="blog-box heading-space-half"> | |
<div class="blog-listing-inner news_item"> | |
<div class="article-card-new"> | |
<!-- Layout DESKTOP - ascuns pe mobil --> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
This script sends the given command-line arguments as a query to the running | |
DocFetcher instance. The results returned by the latter are printed as filename- | |
filepath pairs on the standard output. | |
For more advanced processing of the results, call the search function below | |
directly. In principle, you can also reuse the code in the search function for |
NewerOlder