Skip to content

Instantly share code, notes, and snippets.

@gkhays
Last active May 5, 2025 13:09
Show Gist options
  • Save gkhays/40d1432e9c863a7e5040b9c695317baa to your computer and use it in GitHub Desktop.
Save gkhays/40d1432e9c863a7e5040b9c695317baa to your computer and use it in GitHub Desktop.
Retrieves CVE numbers from a Red Hat Security Advisory
[project]
name = "py-rhsa"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"bs4>=0.0.2",
"requests>=2.32.3",
]
[project]
name = "py-rhsa"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"bs4>=0.0.2",
"requests>=2.32.3",
]
#!/usr/bin/env python3
"""
Red Hat Security Advisory CVE Scraper
This script fetches CVE numbers associated with a Red Hat Security Advisory (RHSA) ID
by scraping the Red Hat Security Portal.
Usage:
python rhsa_cve_scraper.py RHSA-YYYY-NNNN
Example:
python rhsa_cve_scraper.py RHSA-2024-10379
"""
import sys
import re
import requests
import logging
from bs4 import BeautifulSoup
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
LOGGER = logging.getLogger("py-rhsa")
def get_cve_from_rhsa(rhsa_id):
"""
Scrape the Red Hat Security Portal to extract CVE numbers for a given RHSA ID.
Args:
rhsa_id (str): The Red Hat Security Advisory ID (e.g., "RHSA-2024-10379")
Returns:
list: List of CVE IDs associated with the RHSA
"""
# Validate RHSA ID format
if not re.match(r'^RHSA-\d{4}:\d+$', rhsa_id):
LOGGER.warning(f"Warning: Invalid RHSA ID format. Expected format: RHSA-YYYY:NNNN")
if re.match(r'^RHSA-\d{4}-\d+$', rhsa_id):
# Convert RHSA-YYYY-NNNN to RHSA-YYYY:NNNN
rhsa_id = rhsa_id.rsplit("-", 1)[0] + ":" + rhsa_id.rsplit("-", 1)[1]
LOGGER.info(f"Converted RHSA ID to valid format: {rhsa_id}")
else:
return []
url = f"https://access.redhat.com/errata/{rhsa_id}"
try:
LOGGER.info(f"Fetching data from {url}...")
response = requests.get(url, timeout=10)
if response.status_code != 200:
LOGGER.error(f"Error: Received status code {response.status_code}")
return []
soup = BeautifulSoup(response.content, 'html.parser')
cve_list = []
# Find CVE entries in the CVEs section
cve_section = soup.find('section', {'id': 'cves'})
if cve_section:
LOGGER.info("Found CVE section in the page.")
cve_links = cve_section.find_all('a', href=re.compile(r'CVE-\d{4}-\d+'))
for link in cve_links:
cve_match = re.search(r'(CVE-\d{4}-\d+)', link.text)
if cve_match and cve_match.group(1) not in cve_list:
cve_list.append(cve_match.group(1))
# Sometimes CVEs might be listed in a table
cves_from_table = soup.find_all('td', string=re.compile(r'CVE-\d{4}-\d+'))
for cve_cell in cves_from_table:
cve_match = re.search(r'(CVE-\d{4}-\d+)', cve_cell.text)
if cve_match and cve_match.group(1) not in cve_list:
cve_list.append(cve_match.group(1))
# Also look for CVEs in any text throughout the page
all_text = soup.get_text()
cve_matches = re.findall(r'CVE-\d{4}-\d+', all_text)
for cve in cve_matches:
if cve not in cve_list:
cve_list.append(cve)
return cve_list
except requests.exceptions.RequestException as e:
LOGGER.error(f"Error during request: {e}")
return []
except Exception as e:
LOGGER.error(f"Unexpected error: {e}")
return []
def main():
"""Main function to handle command line arguments and print results."""
if len(sys.argv) != 2:
print("Usage: python rhsa_cve_scraper.py RHSA-YYYY-NNNN")
print("Example: python rhsa_cve_scraper.py RHSA-2024-10379")
sys.exit(1)
rhsa_id = sys.argv[1].upper()
cve_list = get_cve_from_rhsa(rhsa_id)
if cve_list:
# print(f"\nFound {len(cve_list)} CVE(s) for {rhsa_id}:")
LOGGER.info(f"Found {len(cve_list)} CVE(s) for {rhsa_id}: {cve_list}")
for cve in sorted(cve_list):
print(f"- {cve}")
else:
print(f"No CVEs found for {rhsa_id}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment