Skip to content

Instantly share code, notes, and snippets.

@seqis
Last active September 24, 2024 01:21
Show Gist options
  • Save seqis/b5b6f2e97124e9192fd47323bf962536 to your computer and use it in GitHub Desktop.
Save seqis/b5b6f2e97124e9192fd47323bf962536 to your computer and use it in GitHub Desktop.
This Python script will go to The Guardian and grab the latest NEWS & WEATHER and save it to NEWS.md in your obsidian vault and overwrite it with the latest news every time you run it.
import requests
import feedparser
import os
from datetime import datetime
from bs4 import BeautifulSoup
import re
# **Configuration Section**
# -----------------------------------
# **Important:** Replace the placeholder values below with your actual information.
# This Python script will go to The Guardian and grab the latest NEWS & also the latest WEATHER
# and save it to NEWS.md in your obsidian vault and overwrite it with the latest news
# every time you run it.
# Your API key from The Guardian
API_KEY = 'Place your Guardian API Key Here'
# Coordinates for YOUR LOCATION (replace with your actual latitude and longitude)
LATITUDE = 'Place your Latitude Here' # Example: '40.7128'
LONGITUDE = 'Place your Longitude Here' # Example: '-74.0060'
# Path to save the NEWS.md file in your Obsidian vault
VAULT_PATH = '/path-/to/vault/notes/NEWS.md'
# -----------------------------------
NWS_API_URL = f"https://api.weather.gov/points/{LATITUDE},{LONGITUDE}"
GUARDIAN_API_URL = 'https://content.guardianapis.com/search'
# Categories to fetch with their corresponding URLs
categories = {
'U.S.': ('us-news', 'https://www.theguardian.com/us-news'),
'World': ('world', 'https://www.theguardian.com/world'),
'Artificial Intelligence': (None, None),
'Business': ('business', 'https://www.theguardian.com/business'),
'Health': ('society', 'https://www.theguardian.com/society')
}
# RSS feeds for additional tech and science news
rss_feeds = {
'TechCrunch': 'https://techcrunch.com/feed/',
'Wired': 'https://www.wired.com/feed/rss',
'Ars Technica': 'https://arstechnica.com/feed/',
'The Verge': 'https://www.theverge.com/rss/index.xml'
}
# NPR RSS feeds
npr_feeds = {
'NPR News': 'https://feeds.npr.org/1001/rss.xml',
'NPR Politics': 'https://feeds.npr.org/1014/rss.xml',
'NPR World News': 'https://feeds.npr.org/1004/rss.xml',
'NPR Science': 'https://feeds.npr.org/1007/rss.xml',
}
# Function to fetch the NWS forecast
def fetch_nws_forecast():
try:
# Step 1: Get gridpoint from coordinates
response = requests.get(NWS_API_URL)
response.raise_for_status()
grid_data = response.json()
# Extract the forecast URL
forecast_url = grid_data['properties']['forecast']
# Step 2: Fetch the weather forecast
response = requests.get(forecast_url)
response.raise_for_status()
forecast_data = response.json()
# Extract the forecast periods (today and the next 5 days)
periods = forecast_data['properties']['periods'][:6] # Get today + next 5 days
# Step 3: Create weather report in markdown format
location = grid_data['properties']['relativeLocation']['properties']
city = location.get('city', 'Unknown City')
state = location.get('state', 'Unknown State')
weather_report = f"### Weather Forecast for {city}, {state}\n\n"
first_line = True
for period in periods:
name = period.get('name', 'Unknown Period')
temperature = period.get('temperature', 'N/A')
short_forecast = period.get('shortForecast', 'No forecast available')
# Check if it's the first line and add four hashtags
if first_line:
weather_report += f"#### **{name}**: {temperature}°F, {short_forecast}\n"
first_line = False
# For 'Tonight' and '* Night' forecasts, italicize, add bullet, and double-indent (without bold)
elif "Night" in name or name.lower() == "tonight":
weather_report += f" - _{name}: {temperature}°F, {short_forecast}_\n"
else:
weather_report += f"- **{name}**: {temperature}°F, {short_forecast}\n"
weather_report += "\n"
return weather_report
except requests.RequestException as e:
return f"Error fetching weather forecast: {e}\n\n"
# Function to fetch news from The Guardian
def fetch_news(section, search_term=None):
if section is None and search_term is None:
return []
params = {
'api-key': API_KEY,
'section': section,
'page-size': 5,
'order-by': 'newest',
'edition': 'us',
'show-fields': 'trailText,headline,bodyText'
}
if search_term:
params['q'] = (
'"artificial intelligence" OR "AI" OR "machine learning" OR "neural networks"'
)
params['section'] = 'technology'
try:
response = requests.get(GUARDIAN_API_URL, params=params)
response.raise_for_status()
return response.json().get('response', {}).get('results', [])
except requests.RequestException as e:
print(f"Error fetching news from The Guardian: {e}")
return []
# Function to clean HTML content from the summary
def clean_html(text):
soup = BeautifulSoup(text, "html.parser")
clean_text = soup.get_text(separator=' ')
return clean_text
# Function to remove lines starting with "©"
def remove_copyright_lines(text):
lines = text.splitlines() # Split the text into lines
filtered_lines = [line for line in lines if not line.strip().startswith('©')] # Filter out lines starting with "©"
return ' '.join(filtered_lines) # Join the remaining lines back together
# Function to extract a summary with at least two sentences and remove copyright lines
def get_summary(text):
cleaned_text = clean_html(text)
cleaned_text = remove_copyright_lines(cleaned_text) # Remove lines starting with "©"
sentences = re.split(r'(?<=\.)\s', cleaned_text) # Splitting the text into sentences
if len(sentences) >= 2:
return ' '.join(sentences[:2]) # Return the first two sentences
return cleaned_text # If there are fewer than 2 sentences, return the whole text
# Function to fetch articles from RSS feeds and limit to 5 articles per source
def fetch_rss(feed_url):
try:
feed = feedparser.parse(feed_url)
articles = []
for entry in feed.entries[:5]: # Limit to the top 5 articles
title = entry.get('title', 'No Title')
link = entry.get('link', '#')
summary_text = entry.get('summary', entry.get('description', ''))
summary = get_summary(summary_text) # Ensure the summary is at least 2 sentences
articles.append({
'title': title,
'link': link,
'summary': summary
})
return articles
except Exception as e:
print(f"Error fetching RSS feed from {feed_url}: {e}")
return []
# Function to create the news markdown content
def create_news_md(guardian_articles, rss_articles, npr_articles):
md_content = "#### **MapOfContent:** [[Scraps#News]]\n---\n\n"
md_content += f"# Daily News Update - {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
# Add weather report
md_content += fetch_nws_forecast() # Insert the weather report here
# Add Guardian articles
for category, (articles, url) in guardian_articles.items():
if url:
md_content += f"## [{category}]({url}):\n"
else:
md_content += f"## {category} (no link available):\n"
for article in articles:
fields = article.get('fields', {})
title = fields.get('headline', 'No Title')
trailText = fields.get('trailText', '')
bodyText = fields.get('bodyText', '')
summary = trailText if trailText else (bodyText[:200] + '...') if bodyText else 'No summary available.'
article_url = article.get('webUrl', '#')
md_content += f"- **[{title}]({article_url})**\n - {summary}\n"
md_content += "\n"
# Add RSS feed articles
for source, articles in rss_articles.items():
md_content += f"## {source}:\n"
for article in articles:
md_content += f"- **[{article['title']}]({article['link']})**\n - {article['summary']}...\n"
md_content += "\n"
# Add a separator before NPR feeds
md_content += "---\n\n"
# Add NPR feed articles
md_content += "## NPR:\n"
for source, articles in npr_articles.items():
md_content += f"### {source}:\n"
for article in articles:
md_content += f"- **[{article['title']}]({article['link']})**\n - {article['summary']}...\n"
md_content += "\n"
return md_content
def main():
# Fetch news for each Guardian category
guardian_articles_by_category = {}
for category_name, (section_id, section_url) in categories.items():
if category_name == 'Artificial Intelligence':
guardian_articles_by_category[category_name] = (fetch_news(None, search_term='Artificial Intelligence'), None)
else:
guardian_articles_by_category[category_name] = (fetch_news(section_id), section_url)
# Fetch articles from regular RSS feeds
rss_articles_by_source = {}
for source_name, feed_url in rss_feeds.items():
rss_articles_by_source[source_name] = fetch_rss(feed_url)
# Fetch articles from NPR RSS feeds
npr_articles_by_source = {}
for source_name, feed_url in npr_feeds.items():
npr_articles_by_source[source_name] = fetch_rss(feed_url)
# Create the markdown content
news_md = create_news_md(guardian_articles_by_category, rss_articles_by_source, npr_articles_by_source)
# Save the markdown content to Obsidian vault
try:
with open(VAULT_PATH, 'w', encoding='utf-8') as file:
file.write(news_md)
print("NEWS.md updated successfully.")
except Exception as e:
print(f"Error writing to {VAULT_PATH}: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment