seqis · September 24, 2024 01:21
diff --git a/gistfile1.txt b/gistfile1.txt
 import requests
 import feedparser
 import os
 from datetime import datetime
 from bs4 import BeautifulSoup
 import re

 # **Configuration Section**
 # -----------------------------------
 # **Important:** Replace the placeholder values below with your actual information.

 # This Python script will go to The Guardian and grab the latest NEWS & also the latest WEATHER
 # and save it to NEWS.md in your obsidian vault and overwrite it with the latest news
 # every time you run it.

 # Your API key from The Guardian
 API_KEY = 'Place your Guardian API Key Here'

 # Coordinates for YOUR LOCATION (replace with your actual latitude and longitude)
 LATITUDE = 'Place your Latitude Here'    # Example: '40.7128'
 LONGITUDE = 'Place your Longitude Here'  # Example: '-74.0060'

 # Path to save the NEWS.md file in your Obsidian vault
 VAULT_PATH = '/path-/to/vault/notes/NEWS.md'
 # -----------------------------------

 NWS_API_URL = f"https://api.weather.gov/points/{LATITUDE},{LONGITUDE}"
 GUARDIAN_API_URL = 'https://content.guardianapis.com/search'

 # Categories to fetch with their corresponding URLs
 categories = {
    'U.S.': ('us-news', 'https://www.theguardian.com/us-news'),
    'World': ('world', 'https://www.theguardian.com/world'),
    'Artificial Intelligence': (None, None),
    'Business': ('business', 'https://www.theguardian.com/business'),
    'Health': ('society', 'https://www.theguardian.com/society')
 }

 # RSS feeds for additional tech and science news
 rss_feeds = {
    'TechCrunch': 'https://techcrunch.com/feed/',
    'Wired': 'https://www.wired.com/feed/rss',
    'Ars Technica': 'https://arstechnica.com/feed/',
    'The Verge': 'https://www.theverge.com/rss/index.xml'
 }

 # NPR RSS feeds
 npr_feeds = {
    'NPR News': 'https://feeds.npr.org/1001/rss.xml',
    'NPR Politics': 'https://feeds.npr.org/1014/rss.xml',
    'NPR World News': 'https://feeds.npr.org/1004/rss.xml',
    'NPR Science': 'https://feeds.npr.org/1007/rss.xml',
 }

 # Function to fetch the NWS forecast
 def fetch_nws_forecast():
    try:
        # Step 1: Get gridpoint from coordinates
        response = requests.get(NWS_API_URL)
        response.raise_for_status()
        grid_data = response.json()

        # Extract the forecast URL
        forecast_url = grid_data['properties']['forecast']

        # Step 2: Fetch the weather forecast
        response = requests.get(forecast_url)
        response.raise_for_status()
        forecast_data = response.json()

        # Extract the forecast periods (today and the next 5 days)
        periods = forecast_data['properties']['periods'][:6]  # Get today + next 5 days

        # Step 3: Create weather report in markdown format
        location = grid_data['properties']['relativeLocation']['properties']
        city = location.get('city', 'Unknown City')
        state = location.get('state', 'Unknown State')
        weather_report = f"### Weather Forecast for {city}, {state}\n\n"

        first_line = True
        for period in periods:
            name = period.get('name', 'Unknown Period')
            temperature = period.get('temperature', 'N/A')
            short_forecast = period.get('shortForecast', 'No forecast available')

            # Check if it's the first line and add four hashtags
            if first_line:
                weather_report += f"#### **{name}**: {temperature}°F, {short_forecast}\n"
                first_line = False
            # For 'Tonight' and '* Night' forecasts, italicize, add bullet, and double-indent (without bold)
            elif "Night" in name or name.lower() == "tonight":
                weather_report += f"  - _{name}: {temperature}°F, {short_forecast}_\n"
            else:
                weather_report += f"- **{name}**: {temperature}°F, {short_forecast}\n"

        weather_report += "\n"

        return weather_report

    except requests.RequestException as e:
        return f"Error fetching weather forecast: {e}\n\n"

 # Function to fetch news from The Guardian
 def fetch_news(section, search_term=None):
    if section is None and search_term is None:
        return []
    params = {
        'api-key': API_KEY,
        'section': section,
        'page-size': 5,
        'order-by': 'newest',
        'edition': 'us',
        'show-fields': 'trailText,headline,bodyText'
    }
    if search_term:
        params['q'] = (
            '"artificial intelligence" OR "AI" OR "machine learning" OR "neural networks"'
        )
        params['section'] = 'technology'
    try:
        response = requests.get(GUARDIAN_API_URL, params=params)
        response.raise_for_status()
        return response.json().get('response', {}).get('results', [])
    except requests.RequestException as e:
        print(f"Error fetching news from The Guardian: {e}")
        return []

 # Function to clean HTML content from the summary
 def clean_html(text):
    soup = BeautifulSoup(text, "html.parser")
    clean_text = soup.get_text(separator=' ')
    return clean_text

 # Function to remove lines starting with "©"
 def remove_copyright_lines(text):
    lines = text.splitlines()  # Split the text into lines
    filtered_lines = [line for line in lines if not line.strip().startswith('©')]  # Filter out lines starting with "©"
    return ' '.join(filtered_lines)  # Join the remaining lines back together

 # Function to extract a summary with at least two sentences and remove copyright lines
 def get_summary(text):
    cleaned_text = clean_html(text)
    cleaned_text = remove_copyright_lines(cleaned_text)  # Remove lines starting with "©"
    sentences = re.split(r'(?<=\.)\s', cleaned_text)  # Splitting the text into sentences
    if len(sentences) >= 2:
        return ' '.join(sentences[:2])  # Return the first two sentences
    return cleaned_text  # If there are fewer than 2 sentences, return the whole text

 # Function to fetch articles from RSS feeds and limit to 5 articles per source
 def fetch_rss(feed_url):
    try:
        feed = feedparser.parse(feed_url)
        articles = []
        for entry in feed.entries[:5]:  # Limit to the top 5 articles
            title = entry.get('title', 'No Title')
            link = entry.get('link', '#')
            summary_text = entry.get('summary', entry.get('description', ''))
            summary = get_summary(summary_text)  # Ensure the summary is at least 2 sentences
            articles.append({
                'title': title,
                'link': link,
                'summary': summary
            })
        return articles
    except Exception as e:
        print(f"Error fetching RSS feed from {feed_url}: {e}")
        return []

 # Function to create the news markdown content
 def create_news_md(guardian_articles, rss_articles, npr_articles):
    md_content = "#### **MapOfContent:** [[Scraps#News]]\n---\n\n"
    md_content += f"# Daily News Update - {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"

    # Add weather report
    md_content += fetch_nws_forecast()  # Insert the weather report here

    # Add Guardian articles
    for category, (articles, url) in guardian_articles.items():
        if url:
            md_content += f"## [{category}]({url}):\n"
        else:
            md_content += f"## {category} (no link available):\n"
        for article in articles:
            fields = article.get('fields', {})
            title = fields.get('headline', 'No Title')
            trailText = fields.get('trailText', '')
            bodyText = fields.get('bodyText', '')
            summary = trailText if trailText else (bodyText[:200] + '...') if bodyText else 'No summary available.'
            article_url = article.get('webUrl', '#')
            md_content += f"- **[{title}]({article_url})**\n  - {summary}\n"
        md_content += "\n"

    # Add RSS feed articles
    for source, articles in rss_articles.items():
        md_content += f"## {source}:\n"
        for article in articles:
            md_content += f"- **[{article['title']}]({article['link']})**\n  - {article['summary']}...\n"
        md_content += "\n"

    # Add a separator before NPR feeds
    md_content += "---\n\n"

    # Add NPR feed articles
    md_content += "## NPR:\n"
    for source, articles in npr_articles.items():
        md_content += f"### {source}:\n"
        for article in articles:
            md_content += f"- **[{article['title']}]({article['link']})**\n  - {article['summary']}...\n"
        md_content += "\n"

    return md_content

 def main():
    # Fetch news for each Guardian category
    guardian_articles_by_category = {}
    for category_name, (section_id, section_url) in categories.items():
        if category_name == 'Artificial Intelligence':
            guardian_articles_by_category[category_name] = (fetch_news(None, search_term='Artificial Intelligence'), None)
        else:
            guardian_articles_by_category[category_name] = (fetch_news(section_id), section_url)

    # Fetch articles from regular RSS feeds
    rss_articles_by_source = {}
    for source_name, feed_url in rss_feeds.items():
        rss_articles_by_source[source_name] = fetch_rss(feed_url)

    # Fetch articles from NPR RSS feeds
    npr_articles_by_source = {}
    for source_name, feed_url in npr_feeds.items():
        npr_articles_by_source[source_name] = fetch_rss(feed_url)

    # Create the markdown content
    news_md = create_news_md(guardian_articles_by_category, rss_articles_by_source, npr_articles_by_source)

    # Save the markdown content to Obsidian vault
    try:
        with open(VAULT_PATH, 'w', encoding='utf-8') as file:
            file.write(news_md)
        print("NEWS.md updated successfully.")
    except Exception as e:
        print(f"Error writing to {VAULT_PATH}: {e}")

 if __name__ == "__main__":
    main()
	import requests
	import feedparser
	import os
	from datetime import datetime
	from bs4 import BeautifulSoup
	import re

	# Configuration Section
	# -----------------------------------
	# Important: Replace the placeholder values below with your actual information.

	# This Python script will go to The Guardian and grab the latest NEWS & also the latest WEATHER
	# and save it to NEWS.md in your obsidian vault and overwrite it with the latest news
	# every time you run it.

	# Your API key from The Guardian
	API_KEY = 'Place your Guardian API Key Here'

	# Coordinates for YOUR LOCATION (replace with your actual latitude and longitude)
	LATITUDE = 'Place your Latitude Here' # Example: '40.7128'
	LONGITUDE = 'Place your Longitude Here' # Example: '-74.0060'

	# Path to save the NEWS.md file in your Obsidian vault
	VAULT_PATH = '/path-/to/vault/notes/NEWS.md'
	# -----------------------------------

	NWS_API_URL = f"https://api.weather.gov/points/{LATITUDE},{LONGITUDE}"
	GUARDIAN_API_URL = 'https://content.guardianapis.com/search'

	# Categories to fetch with their corresponding URLs
	categories = {
	'U.S.': ('us-news', 'https://www.theguardian.com/us-news'),
	'World': ('world', 'https://www.theguardian.com/world'),
	'Artificial Intelligence': (None, None),
	'Business': ('business', 'https://www.theguardian.com/business'),
	'Health': ('society', 'https://www.theguardian.com/society')
	}

	# RSS feeds for additional tech and science news
	rss_feeds = {
	'TechCrunch': 'https://techcrunch.com/feed/',
	'Wired': 'https://www.wired.com/feed/rss',
	'Ars Technica': 'https://arstechnica.com/feed/',
	'The Verge': 'https://www.theverge.com/rss/index.xml'
	}

	# NPR RSS feeds
	npr_feeds = {
	'NPR News': 'https://feeds.npr.org/1001/rss.xml',
	'NPR Politics': 'https://feeds.npr.org/1014/rss.xml',
	'NPR World News': 'https://feeds.npr.org/1004/rss.xml',
	'NPR Science': 'https://feeds.npr.org/1007/rss.xml',
	}

	# Function to fetch the NWS forecast
	def fetch_nws_forecast():
	try:
	# Step 1: Get gridpoint from coordinates
	response = requests.get(NWS_API_URL)
	response.raise_for_status()
	grid_data = response.json()

	# Extract the forecast URL
	forecast_url = grid_data['properties']['forecast']

	# Step 2: Fetch the weather forecast
	response = requests.get(forecast_url)
	response.raise_for_status()
	forecast_data = response.json()

	# Extract the forecast periods (today and the next 5 days)
	periods = forecast_data['properties']['periods'][:6] # Get today + next 5 days

	# Step 3: Create weather report in markdown format
	location = grid_data['properties']['relativeLocation']['properties']
	city = location.get('city', 'Unknown City')
	state = location.get('state', 'Unknown State')
	weather_report = f"### Weather Forecast for {city}, {state}\n\n"

	first_line = True
	for period in periods:
	name = period.get('name', 'Unknown Period')
	temperature = period.get('temperature', 'N/A')
	short_forecast = period.get('shortForecast', 'No forecast available')

	# Check if it's the first line and add four hashtags
	if first_line:
	weather_report += f"#### {name}: {temperature}°F, {short_forecast}\n"
	first_line = False
	# For 'Tonight' and '* Night' forecasts, italicize, add bullet, and double-indent (without bold)
	elif "Night" in name or name.lower() == "tonight":
	weather_report += f" - _{name}: {temperature}°F, {short_forecast}_\n"
	else:
	weather_report += f"- {name}: {temperature}°F, {short_forecast}\n"

	weather_report += "\n"

	return weather_report

	except requests.RequestException as e:
	return f"Error fetching weather forecast: {e}\n\n"

	# Function to fetch news from The Guardian
	def fetch_news(section, search_term=None):
	if section is None and search_term is None:
	return []
	params = {
	'api-key': API_KEY,
	'section': section,
	'page-size': 5,
	'order-by': 'newest',
	'edition': 'us',
	'show-fields': 'trailText,headline,bodyText'
	}
	if search_term:
	params['q'] = (
	'"artificial intelligence" OR "AI" OR "machine learning" OR "neural networks"'
	)
	params['section'] = 'technology'
	try:
	response = requests.get(GUARDIAN_API_URL, params=params)
	response.raise_for_status()
	return response.json().get('response', {}).get('results', [])
	except requests.RequestException as e:
	print(f"Error fetching news from The Guardian: {e}")
	return []

	# Function to clean HTML content from the summary
	def clean_html(text):
	soup = BeautifulSoup(text, "html.parser")
	clean_text = soup.get_text(separator=' ')
	return clean_text

	# Function to remove lines starting with "©"
	def remove_copyright_lines(text):
	lines = text.splitlines() # Split the text into lines
	filtered_lines = [line for line in lines if not line.strip().startswith('©')] # Filter out lines starting with "©"
	return ' '.join(filtered_lines) # Join the remaining lines back together

	# Function to extract a summary with at least two sentences and remove copyright lines
	def get_summary(text):
	cleaned_text = clean_html(text)
	cleaned_text = remove_copyright_lines(cleaned_text) # Remove lines starting with "©"
	sentences = re.split(r'(?<=\.)\s', cleaned_text) # Splitting the text into sentences
	if len(sentences) >= 2:
	return ' '.join(sentences[:2]) # Return the first two sentences
	return cleaned_text # If there are fewer than 2 sentences, return the whole text

	# Function to fetch articles from RSS feeds and limit to 5 articles per source
	def fetch_rss(feed_url):
	try:
	feed = feedparser.parse(feed_url)
	articles = []
	for entry in feed.entries[:5]: # Limit to the top 5 articles
	title = entry.get('title', 'No Title')
	link = entry.get('link', '#')
	summary_text = entry.get('summary', entry.get('description', ''))
	summary = get_summary(summary_text) # Ensure the summary is at least 2 sentences
	articles.append({
	'title': title,
	'link': link,
	'summary': summary
	})
	return articles
	except Exception as e:
	print(f"Error fetching RSS feed from {feed_url}: {e}")
	return []

	# Function to create the news markdown content
	def create_news_md(guardian_articles, rss_articles, npr_articles):
	md_content = "#### MapOfContent: [[Scraps#News]]\n---\n\n"
	md_content += f"# Daily News Update - {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"

	# Add weather report
	md_content += fetch_nws_forecast() # Insert the weather report here

	# Add Guardian articles
	for category, (articles, url) in guardian_articles.items():
	if url:
	md_content += f"## [{category}]({url}):\n"
	else:
	md_content += f"## {category} (no link available):\n"
	for article in articles:
	fields = article.get('fields', {})
	title = fields.get('headline', 'No Title')
	trailText = fields.get('trailText', '')
	bodyText = fields.get('bodyText', '')
	summary = trailText if trailText else (bodyText[:200] + '...') if bodyText else 'No summary available.'
	article_url = article.get('webUrl', '#')
	md_content += f"- [{title}]({article_url})\n - {summary}\n"
	md_content += "\n"

	# Add RSS feed articles
	for source, articles in rss_articles.items():
	md_content += f"## {source}:\n"
	for article in articles:
	md_content += f"- [{article['title']}]({article['link']})\n - {article['summary']}...\n"
	md_content += "\n"

	# Add a separator before NPR feeds
	md_content += "---\n\n"

	# Add NPR feed articles
	md_content += "## NPR:\n"
	for source, articles in npr_articles.items():
	md_content += f"### {source}:\n"
	for article in articles:
	md_content += f"- [{article['title']}]({article['link']})\n - {article['summary']}...\n"
	md_content += "\n"

	return md_content

	def main():
	# Fetch news for each Guardian category
	guardian_articles_by_category = {}
	for category_name, (section_id, section_url) in categories.items():
	if category_name == 'Artificial Intelligence':
	guardian_articles_by_category[category_name] = (fetch_news(None, search_term='Artificial Intelligence'), None)
	else:
	guardian_articles_by_category[category_name] = (fetch_news(section_id), section_url)

	# Fetch articles from regular RSS feeds
	rss_articles_by_source = {}
	for source_name, feed_url in rss_feeds.items():
	rss_articles_by_source[source_name] = fetch_rss(feed_url)

	# Fetch articles from NPR RSS feeds
	npr_articles_by_source = {}
	for source_name, feed_url in npr_feeds.items():
	npr_articles_by_source[source_name] = fetch_rss(feed_url)

	# Create the markdown content
	news_md = create_news_md(guardian_articles_by_category, rss_articles_by_source, npr_articles_by_source)

	# Save the markdown content to Obsidian vault
	try:
	with open(VAULT_PATH, 'w', encoding='utf-8') as file:
	file.write(news_md)
	print("NEWS.md updated successfully.")
	except Exception as e:
	print(f"Error writing to {VAULT_PATH}: {e}")

	if __name__ == "__main__":
	main()