gkhays · May 13, 2025 19:46
diff --git a/rhsa_cve_scraper.py b/rhsa_cve_scraper.py
 #!/usr/bin/env python3
 """
 Red Hat Security Advisory CVE Scraper

 This script fetches CVE numbers associated with a Red Hat Security Advisory (RHSA) ID
 by scraping the Red Hat Security Portal.

 Usage:
    python rhsa_cve_scraper.py RHSA-YYYY-NNNN
    
 Example:
    python rhsa_cve_scraper.py RHSA-2024-10379
 """

 import sys
 import re
 import requests
 import logging
 from bs4 import BeautifulSoup

 # Set up logging
 logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 LOGGER = logging.getLogger("py-rhsa")

 def get_cve_from_rhsa(rhsa_id):
    """
    Scrape the Red Hat Security Portal to extract CVE numbers for a given RHSA ID.
    
    Args:
        rhsa_id (str): The Red Hat Security Advisory ID (e.g., "RHSA-2024-10379")
        
    Returns:
        list: List of CVE IDs associated with the RHSA
    """
    # Validate RHSA ID format
    pattern = r'^RHSA-\d{4}:\d+$'
    if not re.match(pattern, rhsa_id):
        LOGGER.warning(f"Warning: Invalid RHSA ID format. Expected format: RHSA-YYYY:NNNN")
        if re.match(r'^RHSA-\d{4}-\d+$', rhsa_id):
            # Convert RHSA-YYYY-NNNN to RHSA-YYYY:NNNN
            rhsa_id = rhsa_id.rsplit("-", 1)[0] + ":" + rhsa_id.rsplit("-", 1)[1]
            LOGGER.info(f"Converted RHSA ID to valid format: {rhsa_id}")
        else:
            return []    
    
    url = f"https://access.redhat.com/errata/{rhsa_id}"
    
    try:
        LOGGER.info(f"Fetching data from {url}...")
        response = requests.get(url, timeout=10)        
        
        if response.status_code != 200:
            LOGGER.error(f"Error: Received status code {response.status_code}")
            return []        
        
        soup = BeautifulSoup(response.content, 'html.parser')        
        cve_list = []
        
        # Find CVE entries in the CVEs section
        cve_section = soup.find('section', {'id': 'cves'})
        if cve_section:
            LOGGER.info("Found CVE section in the page.")
            cve_links = cve_section.find_all('a', href=re.compile(r'CVE-\d{4}-\d+'))
            for link in cve_links:
                cve_match = re.search(r'(CVE-\d{4}-\d+)', link.text)
                if cve_match and cve_match.group(1) not in cve_list:
                    cve_list.append(cve_match.group(1))
        
        # Sometimes CVEs might be listed in a table
        cves_from_table = soup.find_all('td', string=re.compile(r'CVE-\d{4}-\d+'))
        for cve_cell in cves_from_table:
            cve_match = re.search(r'(CVE-\d{4}-\d+)', cve_cell.text)
            if cve_match and cve_match.group(1) not in cve_list:
                cve_list.append(cve_match.group(1))
        
        # Also look for CVEs in any text throughout the page
        all_text = soup.get_text()
        cve_matches = re.findall(r'CVE-\d{4}-\d+', all_text)
        for cve in cve_matches:
            if cve not in cve_list:
                cve_list.append(cve)
        
        return cve_list
        
    except requests.exceptions.RequestException as e:
        LOGGER.error(f"Error during request: {e}")
        return []
    except Exception as e:
        LOGGER.error(f"Unexpected error: {e}")
        return []


 def main():
    """Main function to handle command line arguments and print results."""
    if len(sys.argv) != 2:
        print("Usage: python rhsa_cve_scraper.py RHSA-YYYY-NNNN")
        print("Example: python rhsa_cve_scraper.py RHSA-2024-10379")
        sys.exit(1)
    
    rhsa_id = sys.argv[1].upper()
    cve_list = get_cve_from_rhsa(rhsa_id)
    
    if cve_list:
        # print(f"\nFound {len(cve_list)} CVE(s) for {rhsa_id}:")
        LOGGER.info(f"Found {len(cve_list)} CVE(s) for {rhsa_id}: {cve_list}")
        for cve in sorted(cve_list):
            print(f"- {cve}")
    else:
        print(f"No CVEs found for {rhsa_id}")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Red Hat Security Advisory CVE Scraper

	This script fetches CVE numbers associated with a Red Hat Security Advisory (RHSA) ID
	by scraping the Red Hat Security Portal.

	Usage:
	python rhsa_cve_scraper.py RHSA-YYYY-NNNN

	Example:
	python rhsa_cve_scraper.py RHSA-2024-10379
	"""

	import sys
	import re
	import requests
	import logging
	from bs4 import BeautifulSoup

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	LOGGER = logging.getLogger("py-rhsa")

	def get_cve_from_rhsa(rhsa_id):
	"""
	Scrape the Red Hat Security Portal to extract CVE numbers for a given RHSA ID.

	Args:
	rhsa_id (str): The Red Hat Security Advisory ID (e.g., "RHSA-2024-10379")

	Returns:
	list: List of CVE IDs associated with the RHSA
	"""
	# Validate RHSA ID format
	pattern = r'^RHSA-\d{4}:\d+$'
	if not re.match(pattern, rhsa_id):
	LOGGER.warning(f"Warning: Invalid RHSA ID format. Expected format: RHSA-YYYY:NNNN")
	if re.match(r'^RHSA-\d{4}-\d+$', rhsa_id):
	# Convert RHSA-YYYY-NNNN to RHSA-YYYY:NNNN
	rhsa_id = rhsa_id.rsplit("-", 1)[0] + ":" + rhsa_id.rsplit("-", 1)[1]
	LOGGER.info(f"Converted RHSA ID to valid format: {rhsa_id}")
	else:
	return []

	url = f"https://access.redhat.com/errata/{rhsa_id}"

	try:
	LOGGER.info(f"Fetching data from {url}...")
	response = requests.get(url, timeout=10)

	if response.status_code != 200:
	LOGGER.error(f"Error: Received status code {response.status_code}")
	return []

	soup = BeautifulSoup(response.content, 'html.parser')
	cve_list = []

	# Find CVE entries in the CVEs section
	cve_section = soup.find('section', {'id': 'cves'})
	if cve_section:
	LOGGER.info("Found CVE section in the page.")
	cve_links = cve_section.find_all('a', href=re.compile(r'CVE-\d{4}-\d+'))
	for link in cve_links:
	cve_match = re.search(r'(CVE-\d{4}-\d+)', link.text)
	if cve_match and cve_match.group(1) not in cve_list:
	cve_list.append(cve_match.group(1))

	# Sometimes CVEs might be listed in a table
	cves_from_table = soup.find_all('td', string=re.compile(r'CVE-\d{4}-\d+'))
	for cve_cell in cves_from_table:
	cve_match = re.search(r'(CVE-\d{4}-\d+)', cve_cell.text)
	if cve_match and cve_match.group(1) not in cve_list:
	cve_list.append(cve_match.group(1))

	# Also look for CVEs in any text throughout the page
	all_text = soup.get_text()
	cve_matches = re.findall(r'CVE-\d{4}-\d+', all_text)
	for cve in cve_matches:
	if cve not in cve_list:
	cve_list.append(cve)

	return cve_list

	except requests.exceptions.RequestException as e:
	LOGGER.error(f"Error during request: {e}")
	return []
	except Exception as e:
	LOGGER.error(f"Unexpected error: {e}")
	return []


	def main():
	"""Main function to handle command line arguments and print results."""
	if len(sys.argv) != 2:
	print("Usage: python rhsa_cve_scraper.py RHSA-YYYY-NNNN")
	print("Example: python rhsa_cve_scraper.py RHSA-2024-10379")
	sys.exit(1)

	rhsa_id = sys.argv[1].upper()
	cve_list = get_cve_from_rhsa(rhsa_id)

	if cve_list:
	# print(f"\nFound {len(cve_list)} CVE(s) for {rhsa_id}:")
	LOGGER.info(f"Found {len(cve_list)} CVE(s) for {rhsa_id}: {cve_list}")
	for cve in sorted(cve_list):
	print(f"- {cve}")
	else:
	print(f"No CVEs found for {rhsa_id}")


	if __name__ == "__main__":
	main()