Skip to content

Instantly share code, notes, and snippets.

@thimslugga
Created October 30, 2025 21:19
Show Gist options
  • Save thimslugga/a0061a4377295c8707fcd83c3870440b to your computer and use it in GitHub Desktop.
Save thimslugga/a0061a4377295c8707fcd83c3870440b to your computer and use it in GitHub Desktop.
Download individual RPM package files from AL2023 repositories
#!/bin/bash
set -e
# Script to download individual RPM files from Amazon Linux 2023 repositories
#
# sudo yum install -y sqlite
#
# Usage: ./download-al2023-rpm.sh <package-name>
# Globals
AL_MIRROR_BASE="https://cdn.amazonlinux.com"
AL2023_MIRROR_BASE="$AL_MIRROR_BASE/al2023"
# Defaults
PACKAGE_NAME=""
REPO="all"
ARCH="x86_64"
VERSION="latest"
OUTPUT_DIR="."
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
function usage() {
echo "Usage: $0 <package-name> [options]"
echo ""
echo "Options:"
echo " -r, --repo <repo> Repository name e.g. core, extras. Default: all"
echo " -a, --arch <arch> Architecture e.g. x86_64 or aarch64. Default: x86_64"
echo " -v, --version <version> AL2023 version e.g. 20251020.01. Default: latest"
echo " -o, --output <dir> Output directory. Default: current directory"
echo " -h, --help Display help message."
echo ""
echo "Example:"
echo " $0 nginx"
echo " $0 nginx -a aarch64 -o /tmp/rpms"
echo " $0 nginx -r extras"
exit 1
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-r|--repo)
REPO="$2"
shift 2
;;
-a|--arch)
ARCH="$2"
shift 2
;;
-v|--version)
VERSION="$2"
shift 2
;;
-o|--output)
OUTPUT_DIR="$2"
shift 2
;;
-h|--help)
usage
;;
*)
if [[ -z "$PACKAGE_NAME" ]]; then
PACKAGE_NAME="$1"
else
echo -e "${RED}ERROR: Unknown option $1${NC}"
usage
fi
shift
;;
esac
done
# Check if package name is provided
if [[ -z "$PACKAGE_NAME" ]]; then
echo -e "${RED}ERROR: Package name required.${NC}"
usage
fi
# Create output directory if it doesn't exist
if [[ ! -d "$OUTPUT_DIR" ]]; then
mkdir -p "$OUTPUT_DIR"
fi
echo -e "${GREEN}=== AL2023 RPM Package Downloader ===${NC}"
echo -e "Package Name: ${YELLOW}${PACKAGE_NAME}${NC}"
echo -e "Package Architecture: ${YELLOW}${ARCH}${NC}"
echo -e "Repository: ${YELLOW}${REPO}${NC}"
echo -e "Output Directory: ${YELLOW}${OUTPUT_DIR}${NC}"
echo ""
# Function to compare versions
function version_gt() {
test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"
}
# Search and download package from a specific repo
function search_and_download() {
local repo_name=$1
echo -e "${GREEN}[*] Check repository: ${repo_name} ${NC}"
# Get the mirrorlist:
#
# Amazon 2 (x86_64) Core https://cdn.amazonlinux.com/2/core/2.0/x86_64/mirror.list
# Amazon 2 (aarch64) core https://cdn.amazonlinux.com/2/core/2.0/aarch64/mirror.list
# Amazon Linux 2023 (x86_64) core https://cdn.amazonlinux.com/al2023/core/mirrors/latest/x86_64/mirror.list
# Amazon Linux 2023 (aarch64) core https://cdn.amazonlinux.com/al2023/core/mirrors/latest/aarch64/mirror.list
MIRRORLIST_URL="${AL2023_MIRROR_BASE}/${repo_name}/mirrors/${VERSION}/${ARCH}/mirror.list"
echo "Mirrorlist URL: $MIRRORLIST_URL"
MIRRORS=$(curl -s -f -L "$MIRRORLIST_URL" 2>/dev/null)
if [[ -z "$MIRRORS" ]]; then
echo -e "${YELLOW}WARN: Unable to fetch mirrorlist for repo: ${repo_name}${NC}"
return 1
fi
# Get the first mirror
MIRROR=$(echo "$MIRRORS" | head -n 1)
echo -e "Selected Mirror: ${YELLOW}${MIRROR}${NC}"
# Download repodata/repomd.xml to get primary database location
echo -e "${GREEN}[*] Fetch repository metadata...${NC}"
REPODATA_URL="${MIRROR}repodata/repomd.xml"
REPOMD_XML=$(curl -s -f "$REPODATA_URL" 2>/dev/null)
if [[ -z "$REPOMD_XML" ]]; then
echo -e "${YELLOW}WARN: Unable to fetch repomd.xml for repo: ${repo_name}${NC}"
return 1
fi
# Extract primary.xml.gz location, handles both primary and primary_db.
PRIMARY_LOCATION=$(echo "$REPOMD_XML" | grep -o '<location href="[^"]*"' | grep -E '(primary\.xml\.gz|primary\.sqlite\.bz2)' | head -n 1 | sed 's/.*href="\([^"]*\)".*/\1/')
if [[ -z "$PRIMARY_LOCATION" ]]; then
echo -e "${YELLOW}WARN: Unable to find primary database location for repo: ${repo_name}${NC}"
return 1
fi
PRIMARY_URL="${MIRROR}${PRIMARY_LOCATION}"
echo "Database URL: $PRIMARY_URL"
# Download and search primary database for the package
echo -e "${GREEN}[*] Search for package...${NC}"
# Handle different file and compression formats
if [[ "$PRIMARY_LOCATION" == *.xml.gz ]]; then
PRIMARY_DATA=$(curl -s -f -L "$PRIMARY_URL" 2>/dev/null | gunzip 2>/dev/null)
elif [[ "$PRIMARY_LOCATION" == *.sqlite.bz2 ]]; then
echo -e "${YELLOW}SQLite file detected, extract data...${NC}"
TEMP_DB=$(mktemp)
curl -s -f "$PRIMARY_URL" 2>/dev/null | bunzip2 > "$TEMP_DB" 2>/dev/null
# Query SQLite DB file for package
RPM_LOCATION=$(sqlite3 "$TEMP_DB" "SELECT location_href FROM packages WHERE name='${PACKAGE_NAME}' LIMIT 1" 2>/dev/null)
rm -f "$TEMP_DB"
if [[ -n "$RPM_LOCATION" ]]; then
TEMP_DB=$(mktemp)
curl -s -f -L "$PRIMARY_URL" 2>/dev/null | bunzip2 > "$TEMP_DB" 2>/dev/null
PACKAGE_VERSION=$(sqlite3 "$TEMP_DB" "SELECT version FROM packages WHERE name='${PACKAGE_NAME}' LIMIT 1" 2>/dev/null)
PACKAGE_RELEASE=$(sqlite3 "$TEMP_DB" "SELECT release FROM packages WHERE name='${PACKAGE_NAME}' LIMIT 1" 2>/dev/null)
rm -f "$TEMP_DB"
fi
else
PRIMARY_DATA=$(curl -s -f -L "$PRIMARY_URL" 2>/dev/null)
fi
# Search for the package in XML format
if [[ -n "$PRIMARY_DATA" ]] && [[ -z "$RPM_LOCATION" ]]; then
RPM_LOCATION=$(echo "$PRIMARY_DATA" | grep -A 100 "<name>${PACKAGE_NAME}</name>" | grep '<location href=' | head -n 1 | sed 's/.*href="\([^"]*\)".*/\1/')
if [[ -n "$RPM_LOCATION" ]]; then
PACKAGE_VERSION=$(echo "$PRIMARY_DATA" | grep -A 100 "<name>${PACKAGE_NAME}</name>" | grep '<version epoch=' | head -n 1 | sed 's/.*ver="\([^"]*\)".*/\1/')
PACKAGE_RELEASE=$(echo "$PRIMARY_DATA" | grep -A 100 "<name>${PACKAGE_NAME}</name>" | grep '<version epoch=' | head -n 1 | sed 's/.*rel="\([^"]*\)".*/\1/')
fi
fi
if [[ -z "$RPM_LOCATION" ]]; then
echo -e "${YELLOW}WARN: Unable to find package '${PACKAGE_NAME}' in repo: ${repo_name}${NC}"
return 1
fi
echo -e "Found Package: ${YELLOW}${PACKAGE_NAME}-${PACKAGE_VERSION}-${PACKAGE_RELEASE}${NC}"
echo "RPM Package Location: $RPM_LOCATION"
echo ""
# Download the RPM
echo -e "${GREEN}[*] Download RPM...${NC}"
RPM_URL="${MIRROR}${RPM_LOCATION}"
RPM_FILENAME=$(basename "$RPM_LOCATION")
OUTPUT_PATH="${OUTPUT_DIR}/${RPM_FILENAME}"
echo "Download URL: $RPM_URL"
echo "Save RPM package: $OUTPUT_PATH"
if curl -L -f -o "$OUTPUT_PATH" "$RPM_URL" 2>/dev/null; then
echo -e "${GREEN}✓ Successfully downloaded RPM package: ${OUTPUT_PATH}${NC}"
FILE_SIZE=$(du -h "$OUTPUT_PATH" | cut -f1)
echo -e "RPM package size: ${YELLOW}${FILE_SIZE}${NC}"
echo ""
echo -e "${GREEN}=== Download Complete ===${NC}"
return 0
else
echo -e "${RED}✗ Failed to download RPM package${NC}"
return 1
fi
}
# Determine which repos to search
if [[ "$REPO" == "all" ]]; then
REPOS=("core" "extras")
else
REPOS=("$REPO")
fi
# Search through repositories
FOUND=false
for repo in "${REPOS[@]}"; do
if search_and_download "$repo"; then
FOUND=true
break
fi
echo ""
done
if [[ "$FOUND" == false ]]; then
echo -e "${RED}ERROR: Unable to find package ${PACKAGE_NAME} in ANY repositories.${NC}"
exit 1
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment