Last active
September 24, 2024 01:21
-
-
Save seqis/b5b6f2e97124e9192fd47323bf962536 to your computer and use it in GitHub Desktop.
This Python script will go to The Guardian and grab the latest NEWS & WEATHER and save it to NEWS.md in your obsidian vault and overwrite it with the latest news every time you run it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import feedparser | |
import os | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
import re | |
# **Configuration Section** | |
# ----------------------------------- | |
# **Important:** Replace the placeholder values below with your actual information. | |
# This Python script will go to The Guardian and grab the latest NEWS & also the latest WEATHER | |
# and save it to NEWS.md in your obsidian vault and overwrite it with the latest news | |
# every time you run it. | |
# Your API key from The Guardian | |
API_KEY = 'Place your Guardian API Key Here' | |
# Coordinates for YOUR LOCATION (replace with your actual latitude and longitude) | |
LATITUDE = 'Place your Latitude Here' # Example: '40.7128' | |
LONGITUDE = 'Place your Longitude Here' # Example: '-74.0060' | |
# Path to save the NEWS.md file in your Obsidian vault | |
VAULT_PATH = '/path-/to/vault/notes/NEWS.md' | |
# ----------------------------------- | |
NWS_API_URL = f"https://api.weather.gov/points/{LATITUDE},{LONGITUDE}" | |
GUARDIAN_API_URL = 'https://content.guardianapis.com/search' | |
# Categories to fetch with their corresponding URLs | |
categories = { | |
'U.S.': ('us-news', 'https://www.theguardian.com/us-news'), | |
'World': ('world', 'https://www.theguardian.com/world'), | |
'Artificial Intelligence': (None, None), | |
'Business': ('business', 'https://www.theguardian.com/business'), | |
'Health': ('society', 'https://www.theguardian.com/society') | |
} | |
# RSS feeds for additional tech and science news | |
rss_feeds = { | |
'TechCrunch': 'https://techcrunch.com/feed/', | |
'Wired': 'https://www.wired.com/feed/rss', | |
'Ars Technica': 'https://arstechnica.com/feed/', | |
'The Verge': 'https://www.theverge.com/rss/index.xml' | |
} | |
# NPR RSS feeds | |
npr_feeds = { | |
'NPR News': 'https://feeds.npr.org/1001/rss.xml', | |
'NPR Politics': 'https://feeds.npr.org/1014/rss.xml', | |
'NPR World News': 'https://feeds.npr.org/1004/rss.xml', | |
'NPR Science': 'https://feeds.npr.org/1007/rss.xml', | |
} | |
# Function to fetch the NWS forecast | |
def fetch_nws_forecast(): | |
try: | |
# Step 1: Get gridpoint from coordinates | |
response = requests.get(NWS_API_URL) | |
response.raise_for_status() | |
grid_data = response.json() | |
# Extract the forecast URL | |
forecast_url = grid_data['properties']['forecast'] | |
# Step 2: Fetch the weather forecast | |
response = requests.get(forecast_url) | |
response.raise_for_status() | |
forecast_data = response.json() | |
# Extract the forecast periods (today and the next 5 days) | |
periods = forecast_data['properties']['periods'][:6] # Get today + next 5 days | |
# Step 3: Create weather report in markdown format | |
location = grid_data['properties']['relativeLocation']['properties'] | |
city = location.get('city', 'Unknown City') | |
state = location.get('state', 'Unknown State') | |
weather_report = f"### Weather Forecast for {city}, {state}\n\n" | |
first_line = True | |
for period in periods: | |
name = period.get('name', 'Unknown Period') | |
temperature = period.get('temperature', 'N/A') | |
short_forecast = period.get('shortForecast', 'No forecast available') | |
# Check if it's the first line and add four hashtags | |
if first_line: | |
weather_report += f"#### **{name}**: {temperature}°F, {short_forecast}\n" | |
first_line = False | |
# For 'Tonight' and '* Night' forecasts, italicize, add bullet, and double-indent (without bold) | |
elif "Night" in name or name.lower() == "tonight": | |
weather_report += f" - _{name}: {temperature}°F, {short_forecast}_\n" | |
else: | |
weather_report += f"- **{name}**: {temperature}°F, {short_forecast}\n" | |
weather_report += "\n" | |
return weather_report | |
except requests.RequestException as e: | |
return f"Error fetching weather forecast: {e}\n\n" | |
# Function to fetch news from The Guardian | |
def fetch_news(section, search_term=None): | |
if section is None and search_term is None: | |
return [] | |
params = { | |
'api-key': API_KEY, | |
'section': section, | |
'page-size': 5, | |
'order-by': 'newest', | |
'edition': 'us', | |
'show-fields': 'trailText,headline,bodyText' | |
} | |
if search_term: | |
params['q'] = ( | |
'"artificial intelligence" OR "AI" OR "machine learning" OR "neural networks"' | |
) | |
params['section'] = 'technology' | |
try: | |
response = requests.get(GUARDIAN_API_URL, params=params) | |
response.raise_for_status() | |
return response.json().get('response', {}).get('results', []) | |
except requests.RequestException as e: | |
print(f"Error fetching news from The Guardian: {e}") | |
return [] | |
# Function to clean HTML content from the summary | |
def clean_html(text): | |
soup = BeautifulSoup(text, "html.parser") | |
clean_text = soup.get_text(separator=' ') | |
return clean_text | |
# Function to remove lines starting with "©" | |
def remove_copyright_lines(text): | |
lines = text.splitlines() # Split the text into lines | |
filtered_lines = [line for line in lines if not line.strip().startswith('©')] # Filter out lines starting with "©" | |
return ' '.join(filtered_lines) # Join the remaining lines back together | |
# Function to extract a summary with at least two sentences and remove copyright lines | |
def get_summary(text): | |
cleaned_text = clean_html(text) | |
cleaned_text = remove_copyright_lines(cleaned_text) # Remove lines starting with "©" | |
sentences = re.split(r'(?<=\.)\s', cleaned_text) # Splitting the text into sentences | |
if len(sentences) >= 2: | |
return ' '.join(sentences[:2]) # Return the first two sentences | |
return cleaned_text # If there are fewer than 2 sentences, return the whole text | |
# Function to fetch articles from RSS feeds and limit to 5 articles per source | |
def fetch_rss(feed_url): | |
try: | |
feed = feedparser.parse(feed_url) | |
articles = [] | |
for entry in feed.entries[:5]: # Limit to the top 5 articles | |
title = entry.get('title', 'No Title') | |
link = entry.get('link', '#') | |
summary_text = entry.get('summary', entry.get('description', '')) | |
summary = get_summary(summary_text) # Ensure the summary is at least 2 sentences | |
articles.append({ | |
'title': title, | |
'link': link, | |
'summary': summary | |
}) | |
return articles | |
except Exception as e: | |
print(f"Error fetching RSS feed from {feed_url}: {e}") | |
return [] | |
# Function to create the news markdown content | |
def create_news_md(guardian_articles, rss_articles, npr_articles): | |
md_content = "#### **MapOfContent:** [[Scraps#News]]\n---\n\n" | |
md_content += f"# Daily News Update - {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n" | |
# Add weather report | |
md_content += fetch_nws_forecast() # Insert the weather report here | |
# Add Guardian articles | |
for category, (articles, url) in guardian_articles.items(): | |
if url: | |
md_content += f"## [{category}]({url}):\n" | |
else: | |
md_content += f"## {category} (no link available):\n" | |
for article in articles: | |
fields = article.get('fields', {}) | |
title = fields.get('headline', 'No Title') | |
trailText = fields.get('trailText', '') | |
bodyText = fields.get('bodyText', '') | |
summary = trailText if trailText else (bodyText[:200] + '...') if bodyText else 'No summary available.' | |
article_url = article.get('webUrl', '#') | |
md_content += f"- **[{title}]({article_url})**\n - {summary}\n" | |
md_content += "\n" | |
# Add RSS feed articles | |
for source, articles in rss_articles.items(): | |
md_content += f"## {source}:\n" | |
for article in articles: | |
md_content += f"- **[{article['title']}]({article['link']})**\n - {article['summary']}...\n" | |
md_content += "\n" | |
# Add a separator before NPR feeds | |
md_content += "---\n\n" | |
# Add NPR feed articles | |
md_content += "## NPR:\n" | |
for source, articles in npr_articles.items(): | |
md_content += f"### {source}:\n" | |
for article in articles: | |
md_content += f"- **[{article['title']}]({article['link']})**\n - {article['summary']}...\n" | |
md_content += "\n" | |
return md_content | |
def main(): | |
# Fetch news for each Guardian category | |
guardian_articles_by_category = {} | |
for category_name, (section_id, section_url) in categories.items(): | |
if category_name == 'Artificial Intelligence': | |
guardian_articles_by_category[category_name] = (fetch_news(None, search_term='Artificial Intelligence'), None) | |
else: | |
guardian_articles_by_category[category_name] = (fetch_news(section_id), section_url) | |
# Fetch articles from regular RSS feeds | |
rss_articles_by_source = {} | |
for source_name, feed_url in rss_feeds.items(): | |
rss_articles_by_source[source_name] = fetch_rss(feed_url) | |
# Fetch articles from NPR RSS feeds | |
npr_articles_by_source = {} | |
for source_name, feed_url in npr_feeds.items(): | |
npr_articles_by_source[source_name] = fetch_rss(feed_url) | |
# Create the markdown content | |
news_md = create_news_md(guardian_articles_by_category, rss_articles_by_source, npr_articles_by_source) | |
# Save the markdown content to Obsidian vault | |
try: | |
with open(VAULT_PATH, 'w', encoding='utf-8') as file: | |
file.write(news_md) | |
print("NEWS.md updated successfully.") | |
except Exception as e: | |
print(f"Error writing to {VAULT_PATH}: {e}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment