AdventureBear · October 26, 2024 19:40
diff --git a/weather_report_generator.py b/weather_report_generator.py
 import requests
 from bs4 import BeautifulSoup
 from datetime import datetime
 import pytz
 from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
 from reportlab.lib.units import inch
 from reportlab.platypus import Paragraph, Frame
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from io import BytesIO
 from PIL import Image
 import logging
 from reportlab.lib.utils import ImageReader
 import os
 import re

 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

 # Suppress InsecureRequestWarning
 import urllib3

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


 def scrape_weather_gov():
    url = "https://www.weather.gov/"
    response = requests.get(url, verify=False)
    soup = BeautifulSoup(response.text, 'html.parser')

    eastern = pytz.timezone('US/Eastern')
    date = datetime.now(eastern)
    formatted_date = date.strftime("%Y-%m-%d %H:%M:%S")

    topnews = soup.find('div', id='topnews')
    if topnews:
        headline = topnews.find('h1')
        paragraph = topnews.find('p')

        if headline and paragraph:
            headline_text = headline.text.strip()
            paragraph_text = paragraph.text.strip()
            return formatted_date, headline_text, paragraph_text
        else:
            return formatted_date, "Headline not found", "Paragraph not found"
    else:
        return formatted_date, "Top news section not found", "Top news section not found"


 def get_map_urls():
    surface_map_url = "https://www.wpc.ncep.noaa.gov/sfc/usfntsfcwbg.gif"

    # Calculate the 500mb map URL based on the current date
    current_date = datetime.now().strftime("%Y%m%d")
    mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif"
    # mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif"

    return surface_map_url, mb500_map_url


 def download_image(url):
    try:
        response = requests.get(url, verify=False)
        response.raise_for_status()  # Raise an exception for bad status codes
        image = Image.open(BytesIO(response.content))
        return image
    except requests.RequestException as e:
        logging.error(f"Error downloading image from {url}: {str(e)}")
        return None
    except IOError as e:
        logging.error(f"Error opening image from {url}: {str(e)}")
        return None


 def scrape_discussion():
    url = "https://www.wpc.ncep.noaa.gov/discussions/hpcdiscussions.php?disc=pmdspd"
    response = requests.get(url, verify=False)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the pre tag containing the discussion
    discussion_tag = soup.find('pre')

    if discussion_tag:
        # Extract the text and remove any leading/trailing whitespace
        discussion_text = discussion_tag.text.strip()

        # Split the text into paragraphs
        paragraphs = discussion_text.split('\n\n')

        # Remove line breaks within each paragraph
        formatted_paragraphs = [re.sub(r'\s+', ' ', p.strip()) for p in paragraphs]

        # Join the paragraphs back together with double newlines
        formatted_discussion = '\n\n'.join(formatted_paragraphs)

        return formatted_discussion
    else:
        return "Discussion not found"


 def create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion):
    print(headline, "Headline in create PDF")
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter

    # Set up styles
    styles = getSampleStyleSheet()
    title_style = ParagraphStyle(
        'Title',
        parent=styles['Heading1'],
        fontSize=14,
        leading=16,
        spaceAfter=2
    )
    body_style = ParagraphStyle(
        'Body',
        parent=styles['BodyText'],
        fontSize=10,
        leading=12,
        spaceAfter=2
    )
    discussion_style = ParagraphStyle(
        'Discussion',
        parent=styles['BodyText'],
        fontSize=9,
        leading=12,
        spaceAfter=3
    )

    # Add title and date
    c.setFont("Helvetica-Bold", 16)
    c.drawString(0.5 * inch, height - 0.5 * inch, "Daily Weather Report")
    c.setFont("Helvetica", 10)
    c.drawString(0.5 * inch, height - 0.75 * inch, f"Date: {date}")

    #Add headline to canvas
    c.setFont("Helvetica-Bold", 12)
    c.drawString(0.5 * inch, height -  inch, headline)

    # Add headline and paragraph
    content_width = width - inch
    # headline_frame = Frame(0.5 * inch, height - 1.5 * inch, content_width, 0.5 * inch, showBoundary=0)
    # headline_paragraph = Paragraph(headline, title_style)
    # headline_frame.addFromList([headline_paragraph], c)



    paragraph_frame = Frame(0.5 * inch, height - 1.75 * inch, content_width, 0.75 * inch, showBoundary=0)
    body_paragraph = Paragraph(paragraph, body_style)
    paragraph_frame.addFromList([body_paragraph], c)

    # Download and add images
    surface_map = download_image(surface_map_url)
    mb500_map = download_image(mb500_map_url)

    # Calculate image sizes
    img_width = (width - 1.5 * inch) / 2
    img_height = 2.75 * inch

    if surface_map:
        c.drawImage(ImageReader(surface_map), 0.5 * inch, height - 4.75 * inch, width=img_width, height=img_height,
                    preserveAspectRatio=True)
        c.setFont("Helvetica-Bold", 10)
        c.drawString(0.5 * inch, height - 1.95 * inch, "Surface Map")
    else:
        c.setFont("Helvetica", 10)
        c.drawString(0.5 * inch, height - 3.5 * inch, f"Error loading Surface Map")

    if mb500_map:
        c.drawImage(ImageReader(mb500_map), 0.75 * inch + img_width, height - 4.75 * inch, width=img_width,
                    height=img_height, preserveAspectRatio=True)
        c.setFont("Helvetica-Bold", 10)
        c.drawString(0.75 * inch + img_width, height - 1.95 * inch, "500mb Map")
    else:
        c.setFont("Helvetica", 10)
        c.drawString(0.75 * inch + img_width, height - 3.5 * inch, f"Error loading 500mb Map")

    # Add discussion
    c.setFont("Helvetica-Bold", 12)
    c.drawString(0.5 * inch, height - 5.25 * inch, "Weather Discussion")

    discussion_frame = Frame(
        0.5 * inch,
        0.5 * inch,
        content_width,
        5 * inch,  # Reduced height to accommodate the space after the headline
        leftPadding=0,
        bottomPadding=0,
        rightPadding=0,
        topPadding=0
    )

    discussion_paragraphs = [Paragraph(p.strip(), discussion_style) for p in discussion.split('\n\n') if p.strip()]
    discussion_frame.addFromList(discussion_paragraphs, c)

    # Add footer
    c.setFont("Helvetica", 10)
    c.drawString(0.5 * inch, 0.25 * inch, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    c.save()


 if __name__ == "__main__":
    # Scrape weather data
    date, headline, paragraph = scrape_weather_gov()

    # Get map URLs
    surface_map_url, mb500_map_url = get_map_urls()

    # Scrape discussion
    discussion = scrape_discussion()

    # Create reports directory if it doesn't exist
    reports_dir = "reports"
    os.makedirs(reports_dir, exist_ok=True)

    # Generate filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = os.path.join(reports_dir, f"weather_report_{timestamp}.pdf")

    # Create PDF
    create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion)

    print(f"Weather report PDF created: {filename}")
    print(f"Date: {date}")
    print(f"Headline: {headline}")
    print(f"Paragraph: {paragraph}")
    print(f"Surface Map URL: {surface_map_url}")
    print(f"500mb Map URL: {mb500_map_url}")
    print(f"Discussion length: {len(discussion)} characters")
	import requests
	from bs4 import BeautifulSoup
	from datetime import datetime
	import pytz
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from reportlab.lib.units import inch
	from reportlab.platypus import Paragraph, Frame
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from io import BytesIO
	from PIL import Image
	import logging
	from reportlab.lib.utils import ImageReader
	import os
	import re

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Suppress InsecureRequestWarning
	import urllib3

	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


	def scrape_weather_gov():
	url = "https://www.weather.gov/"
	response = requests.get(url, verify=False)
	soup = BeautifulSoup(response.text, 'html.parser')

	eastern = pytz.timezone('US/Eastern')
	date = datetime.now(eastern)
	formatted_date = date.strftime("%Y-%m-%d %H:%M:%S")

	topnews = soup.find('div', id='topnews')
	if topnews:
	headline = topnews.find('h1')
	paragraph = topnews.find('p')

	if headline and paragraph:
	headline_text = headline.text.strip()
	paragraph_text = paragraph.text.strip()
	return formatted_date, headline_text, paragraph_text
	else:
	return formatted_date, "Headline not found", "Paragraph not found"
	else:
	return formatted_date, "Top news section not found", "Top news section not found"


	def get_map_urls():
	surface_map_url = "https://www.wpc.ncep.noaa.gov/sfc/usfntsfcwbg.gif"

	# Calculate the 500mb map URL based on the current date
	current_date = datetime.now().strftime("%Y%m%d")
	mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif"
	# mb500_map_url = f"https://weather.uwyo.edu/upperair/maps/{current_date}00.500a.naconf.gif"

	return surface_map_url, mb500_map_url


	def download_image(url):
	try:
	response = requests.get(url, verify=False)
	response.raise_for_status() # Raise an exception for bad status codes
	image = Image.open(BytesIO(response.content))
	return image
	except requests.RequestException as e:
	logging.error(f"Error downloading image from {url}: {str(e)}")
	return None
	except IOError as e:
	logging.error(f"Error opening image from {url}: {str(e)}")
	return None


	def scrape_discussion():
	url = "https://www.wpc.ncep.noaa.gov/discussions/hpcdiscussions.php?disc=pmdspd"
	response = requests.get(url, verify=False)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find the pre tag containing the discussion
	discussion_tag = soup.find('pre')

	if discussion_tag:
	# Extract the text and remove any leading/trailing whitespace
	discussion_text = discussion_tag.text.strip()

	# Split the text into paragraphs
	paragraphs = discussion_text.split('\n\n')

	# Remove line breaks within each paragraph
	formatted_paragraphs = [re.sub(r'\s+', ' ', p.strip()) for p in paragraphs]

	# Join the paragraphs back together with double newlines
	formatted_discussion = '\n\n'.join(formatted_paragraphs)

	return formatted_discussion
	else:
	return "Discussion not found"


	def create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion):
	print(headline, "Headline in create PDF")
	c = canvas.Canvas(filename, pagesize=letter)
	width, height = letter

	# Set up styles
	styles = getSampleStyleSheet()
	title_style = ParagraphStyle(
	'Title',
	parent=styles['Heading1'],
	fontSize=14,
	leading=16,
	spaceAfter=2
	)
	body_style = ParagraphStyle(
	'Body',
	parent=styles['BodyText'],
	fontSize=10,
	leading=12,
	spaceAfter=2
	)
	discussion_style = ParagraphStyle(
	'Discussion',
	parent=styles['BodyText'],
	fontSize=9,
	leading=12,
	spaceAfter=3
	)

	# Add title and date
	c.setFont("Helvetica-Bold", 16)
	c.drawString(0.5 * inch, height - 0.5 * inch, "Daily Weather Report")
	c.setFont("Helvetica", 10)
	c.drawString(0.5 * inch, height - 0.75 * inch, f"Date: {date}")

	#Add headline to canvas
	c.setFont("Helvetica-Bold", 12)
	c.drawString(0.5 * inch, height - inch, headline)

	# Add headline and paragraph
	content_width = width - inch
	# headline_frame = Frame(0.5 * inch, height - 1.5 * inch, content_width, 0.5 * inch, showBoundary=0)
	# headline_paragraph = Paragraph(headline, title_style)
	# headline_frame.addFromList([headline_paragraph], c)



	paragraph_frame = Frame(0.5 * inch, height - 1.75 * inch, content_width, 0.75 * inch, showBoundary=0)
	body_paragraph = Paragraph(paragraph, body_style)
	paragraph_frame.addFromList([body_paragraph], c)

	# Download and add images
	surface_map = download_image(surface_map_url)
	mb500_map = download_image(mb500_map_url)

	# Calculate image sizes
	img_width = (width - 1.5 * inch) / 2
	img_height = 2.75 * inch

	if surface_map:
	c.drawImage(ImageReader(surface_map), 0.5 * inch, height - 4.75 * inch, width=img_width, height=img_height,
	preserveAspectRatio=True)
	c.setFont("Helvetica-Bold", 10)
	c.drawString(0.5 * inch, height - 1.95 * inch, "Surface Map")
	else:
	c.setFont("Helvetica", 10)
	c.drawString(0.5 * inch, height - 3.5 * inch, f"Error loading Surface Map")

	if mb500_map:
	c.drawImage(ImageReader(mb500_map), 0.75 * inch + img_width, height - 4.75 * inch, width=img_width,
	height=img_height, preserveAspectRatio=True)
	c.setFont("Helvetica-Bold", 10)
	c.drawString(0.75 * inch + img_width, height - 1.95 * inch, "500mb Map")
	else:
	c.setFont("Helvetica", 10)
	c.drawString(0.75 * inch + img_width, height - 3.5 * inch, f"Error loading 500mb Map")

	# Add discussion
	c.setFont("Helvetica-Bold", 12)
	c.drawString(0.5 * inch, height - 5.25 * inch, "Weather Discussion")

	discussion_frame = Frame(
	0.5 * inch,
	0.5 * inch,
	content_width,
	5 * inch, # Reduced height to accommodate the space after the headline
	leftPadding=0,
	bottomPadding=0,
	rightPadding=0,
	topPadding=0
	)

	discussion_paragraphs = [Paragraph(p.strip(), discussion_style) for p in discussion.split('\n\n') if p.strip()]
	discussion_frame.addFromList(discussion_paragraphs, c)

	# Add footer
	c.setFont("Helvetica", 10)
	c.drawString(0.5 * inch, 0.25 * inch, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

	c.save()


	if __name__ == "__main__":
	# Scrape weather data
	date, headline, paragraph = scrape_weather_gov()

	# Get map URLs
	surface_map_url, mb500_map_url = get_map_urls()

	# Scrape discussion
	discussion = scrape_discussion()

	# Create reports directory if it doesn't exist
	reports_dir = "reports"
	os.makedirs(reports_dir, exist_ok=True)

	# Generate filename
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = os.path.join(reports_dir, f"weather_report_{timestamp}.pdf")

	# Create PDF
	create_weather_pdf(filename, date, headline, paragraph, surface_map_url, mb500_map_url, discussion)

	print(f"Weather report PDF created: {filename}")
	print(f"Date: {date}")
	print(f"Headline: {headline}")
	print(f"Paragraph: {paragraph}")
	print(f"Surface Map URL: {surface_map_url}")
	print(f"500mb Map URL: {mb500_map_url}")
	print(f"Discussion length: {len(discussion)} characters")