Created
January 19, 2024 00:39
-
-
Save enriched/8d76abd68f3dd3d8875591b73152f01b to your computer and use it in GitHub Desktop.
Get rpm metadata files for repository
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from urllib.parse import urljoin | |
import urllib | |
import gzip | |
from pathlib import Path, PurePath | |
import requests | |
from bs4 import BeautifulSoup | |
NOTEBOOK_DIR = os.getcwd() | |
REPODATA_DIR = Path(NOTEBOOK_DIR) / "repodata" | |
REPOCACHE_DIR = REPODATA_DIR / "cache" | |
if not REPODATA_DIR.exists(): | |
os.mkdir(REPODATA_DIR) | |
if not REPOCACHE_DIR.exists(): | |
os.mkdir(REPOCACHE_DIR) | |
BASEARCH = "x86_64" | |
CENTOS_MIRROR_URL = "http://mirror.centos.org/centos/" | |
CENTOS7_BASEOS_URL = urljoin(CENTOS_MIRROR_URL, f"7/os/{BASEARCH}/") | |
# Get repomd.xml | |
repomd_request = requests.get(f"{CENTOS7_BASEOS_URL}/repodata/repomd.xml") | |
repomd_content = repomd_request.content | |
repomd_path = REPODATA_DIR / "repomd.xml" | |
repomd_path.write_bytes(repomd_content) | |
repomd = BeautifulSoup(repomd_request.content, "xml") | |
def get_metadata_file(type: str) -> PurePath: | |
metadata_file_href = repomd.find_all("data", type=type)[0].location["href"] | |
metadata_filename: str = os.path.basename(metadata_file_href) | |
metadata_file_url = urllib.parse.urljoin(CENTOS7_BASEOS_URL, metadata_file_href) | |
request = requests.get(metadata_file_url) | |
metadata_file_path = REPODATA_DIR / metadata_filename | |
metadata_file_path.write_bytes(request.content) | |
if metadata_file_path.suffix == ".gz": | |
metadata_file_decomp_path = metadata_file_path.with_suffix("") | |
metadata_file_decomp_path.write_bytes(gzip.decompress(request.content)) | |
return metadata_file_path | |
# Get primary.xml | |
primary_file_path = get_metadata_file("primary") | |
# Get filelists.xml | |
filelists_file_path = get_metadata_file("filelists") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment