Created
October 26, 2024 10:10
-
-
Save 5shekel/b13f8af930dec6a3f61f63b4a2873f44 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import xml.etree.ElementTree as ET | |
from datetime import datetime | |
from googletrans import Translator | |
import re | |
def contains_hebrew(text): | |
"""Check if the text contains Hebrew characters""" | |
hebrew_pattern = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]') | |
return bool(hebrew_pattern.search(text)) | |
def contains_russian(text): | |
"""Check if the text contains Russian characters""" | |
russian_pattern = re.compile(r'[\u0400-\u04FF]') | |
return bool(russian_pattern.search(text)) | |
def fetch_google_trends(): | |
# Initialize translator | |
translator = Translator() | |
# URL of the Google Trends RSS feed for Israel | |
url = "https://trends.google.com/trends/trendingsearches/daily/rss?geo=IL" | |
try: | |
# Fetch the RSS feed | |
response = requests.get(url) | |
response.raise_for_status() | |
# Register the namespace | |
ET.register_namespace('ht', 'https://trends.google.com/trends/trendingsearches/daily') | |
# Parse the XML content | |
root = ET.fromstring(response.content) | |
# Find all item elements | |
items = root.findall('./channel/item') | |
print(f"\nGoogle Trends in Israel - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
print("-" * 60) | |
# Extract and print each trending topic | |
for idx, item in enumerate(items, 1): | |
title = item.find('title').text | |
link = item.find('link').text | |
# Find news items using the correct namespace | |
news_items = item.findall('.//{https://trends.google.com/trends/trendingsearches/daily}news_item') | |
# Handle Hebrew text | |
if contains_hebrew(title): | |
translation = translator.translate(title, dest='en') | |
print(f"{idx}. {title} (Hebrew)") | |
print(f" Translation: {translation.text}") | |
# Handle Russian text | |
elif contains_russian(title): | |
translation = translator.translate(title, dest='en') | |
print(f"{idx}. {title} (Russian)") | |
print(f" Translation: {translation.text}") | |
else: | |
print(f"{idx}. {title}") | |
# print(f" Link: {link}") | |
# Print news items if available | |
if news_items: | |
for i, news_item in enumerate(news_items, 1): | |
news_title = news_item.find('.//{https://trends.google.com/trends/trendingsearches/daily}news_item_title') | |
news_source = news_item.find('.//{https://trends.google.com/trends/trendingsearches/daily}news_item_source') | |
news_url = news_item.find('.//{https://trends.google.com/trends/trendingsearches/daily}news_item_url') | |
if news_source is not None: | |
print(f" News {i} Source: {news_source.text}") | |
if news_url is not None: | |
print(f" News {i} URL: {news_url.text}") | |
print() | |
except requests.RequestException as e: | |
print(f"Error fetching the RSS feed: {e}") | |
except ET.ParseError as e: | |
print(f"Error parsing the XML content: {e}") | |
except Exception as e: | |
print(f"An unexpected error occurred: {e}") | |
import traceback | |
print(traceback.format_exc()) | |
if __name__ == "__main__": | |
fetch_google_trends() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment