Created
October 26, 2024 19:40
-
-
Save AdventureBear/e0649852cb9274b4335de62a10a4a407 to your computer and use it in GitHub Desktop.
Python Daily National Weather Report Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import pytz | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.pagesizes import letter | |
from reportlab.lib.units import inch | |
from reportlab.platypus import Paragraph, Frame | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from io import BytesIO | |
from PIL import Image | |
import logging | |
from reportlab.lib.utils import ImageReader | |
import os | |
import re | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Suppress InsecureRequestWarning | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
def scrape_weather_gov(): | |
url = "https://www.weather.gov/" | |
response = requests.get(url, verify=False) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
eastern = pytz.timezone('US/Eastern') | |
date = datetime.now(eastern) | |
formatted_date = date.strftime("%Y-%m-%d %H:%M:%S") | |
topnews = soup.find('div', id='topnews') | |
if topnews: | |
headline = topnews.find('h1') | |
paragraph = topnews.find('p') | |
if headline and paragraph: | |
headline_text = headline.text.strip() | |
paragraph_text = paragraph.text.strip() | |
return formatted_date, headline_text, paragraph_text | |
else: | |
return formatted_date, "Headline not found", "Paragraph not found" | |
else: | |
return formatted_date, "Top news section not found", "Top news section not found" | |
def get_map_urls(): | |
surface_map_url = "https://www.wpc.ncep.noaa.gov/sfc/usfntsfcwbg.gif" | |
# Calculate the 500mb map URL based on the current date | |
current_date = datetime.now().strftime("%Y%m%d") | |
mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif" | |
# mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif" | |
return surface_map_url, mb500_map_url | |
def download_image(url): | |
try: | |
response = requests.get(url, verify=False) | |
response.raise_for_status() # Raise an exception for bad status codes | |
image = Image.open(BytesIO(response.content)) | |
return image | |
except requests.RequestException as e: | |
logging.error(f"Error downloading image from {url}: {str(e)}") | |
return None | |
except IOError as e: | |
logging.error(f"Error opening image from {url}: {str(e)}") | |
return None | |
def scrape_discussion(): | |
url = "https://www.wpc.ncep.noaa.gov/discussions/hpcdiscussions.php?disc=pmdspd" | |
response = requests.get(url, verify=False) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find the pre tag containing the discussion | |
discussion_tag = soup.find('pre') | |
if discussion_tag: | |
# Extract the text and remove any leading/trailing whitespace | |
discussion_text = discussion_tag.text.strip() | |
# Split the text into paragraphs | |
paragraphs = discussion_text.split('\n\n') | |
# Remove line breaks within each paragraph | |
formatted_paragraphs = [re.sub(r'\s+', ' ', p.strip()) for p in paragraphs] | |
# Join the paragraphs back together with double newlines | |
formatted_discussion = '\n\n'.join(formatted_paragraphs) | |
return formatted_discussion | |
else: | |
return "Discussion not found" | |
def create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion): | |
print(headline, "Headline in create PDF") | |
c = canvas.Canvas(filename, pagesize=letter) | |
width, height = letter | |
# Set up styles | |
styles = getSampleStyleSheet() | |
title_style = ParagraphStyle( | |
'Title', | |
parent=styles['Heading1'], | |
fontSize=14, | |
leading=16, | |
spaceAfter=2 | |
) | |
body_style = ParagraphStyle( | |
'Body', | |
parent=styles['BodyText'], | |
fontSize=10, | |
leading=12, | |
spaceAfter=2 | |
) | |
discussion_style = ParagraphStyle( | |
'Discussion', | |
parent=styles['BodyText'], | |
fontSize=9, | |
leading=12, | |
spaceAfter=3 | |
) | |
# Add title and date | |
c.setFont("Helvetica-Bold", 16) | |
c.drawString(0.5 * inch, height - 0.5 * inch, "Daily Weather Report") | |
c.setFont("Helvetica", 10) | |
c.drawString(0.5 * inch, height - 0.75 * inch, f"Date: {date}") | |
#Add headline to canvas | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(0.5 * inch, height - inch, headline) | |
# Add headline and paragraph | |
content_width = width - inch | |
# headline_frame = Frame(0.5 * inch, height - 1.5 * inch, content_width, 0.5 * inch, showBoundary=0) | |
# headline_paragraph = Paragraph(headline, title_style) | |
# headline_frame.addFromList([headline_paragraph], c) | |
paragraph_frame = Frame(0.5 * inch, height - 1.75 * inch, content_width, 0.75 * inch, showBoundary=0) | |
body_paragraph = Paragraph(paragraph, body_style) | |
paragraph_frame.addFromList([body_paragraph], c) | |
# Download and add images | |
surface_map = download_image(surface_map_url) | |
mb500_map = download_image(mb500_map_url) | |
# Calculate image sizes | |
img_width = (width - 1.5 * inch) / 2 | |
img_height = 2.75 * inch | |
if surface_map: | |
c.drawImage(ImageReader(surface_map), 0.5 * inch, height - 4.75 * inch, width=img_width, height=img_height, | |
preserveAspectRatio=True) | |
c.setFont("Helvetica-Bold", 10) | |
c.drawString(0.5 * inch, height - 1.95 * inch, "Surface Map") | |
else: | |
c.setFont("Helvetica", 10) | |
c.drawString(0.5 * inch, height - 3.5 * inch, f"Error loading Surface Map") | |
if mb500_map: | |
c.drawImage(ImageReader(mb500_map), 0.75 * inch + img_width, height - 4.75 * inch, width=img_width, | |
height=img_height, preserveAspectRatio=True) | |
c.setFont("Helvetica-Bold", 10) | |
c.drawString(0.75 * inch + img_width, height - 1.95 * inch, "500mb Map") | |
else: | |
c.setFont("Helvetica", 10) | |
c.drawString(0.75 * inch + img_width, height - 3.5 * inch, f"Error loading 500mb Map") | |
# Add discussion | |
c.setFont("Helvetica-Bold", 12) | |
c.drawString(0.5 * inch, height - 5.25 * inch, "Weather Discussion") | |
discussion_frame = Frame( | |
0.5 * inch, | |
0.5 * inch, | |
content_width, | |
5 * inch, # Reduced height to accommodate the space after the headline | |
leftPadding=0, | |
bottomPadding=0, | |
rightPadding=0, | |
topPadding=0 | |
) | |
discussion_paragraphs = [Paragraph(p.strip(), discussion_style) for p in discussion.split('\n\n') if p.strip()] | |
discussion_frame.addFromList(discussion_paragraphs, c) | |
# Add footer | |
c.setFont("Helvetica", 10) | |
c.drawString(0.5 * inch, 0.25 * inch, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
c.save() | |
if __name__ == "__main__": | |
# Scrape weather data | |
date, headline, paragraph = scrape_weather_gov() | |
# Get map URLs | |
surface_map_url, mb500_map_url = get_map_urls() | |
# Scrape discussion | |
discussion = scrape_discussion() | |
# Create reports directory if it doesn't exist | |
reports_dir = "reports" | |
os.makedirs(reports_dir, exist_ok=True) | |
# Generate filename | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = os.path.join(reports_dir, f"weather_report_{timestamp}.pdf") | |
# Create PDF | |
create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion) | |
print(f"Weather report PDF created: {filename}") | |
print(f"Date: {date}") | |
print(f"Headline: {headline}") | |
print(f"Paragraph: {paragraph}") | |
print(f"Surface Map URL: {surface_map_url}") | |
print(f"500mb Map URL: {mb500_map_url}") | |
print(f"Discussion length: {len(discussion)} characters") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment