Usage:
```
$ mkdir RHEL9Doc
$ cd RHEL9Doc
$ fetchdoc.sh https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9
```
Prerequirement: curl, GNU Parallel
#!/bin/bash | |
if [ -z "$1" ]; then | |
echo "Usage: $0 <URL>" | |
echo "URL: product document index pagehttps://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9" | |
exit 1 | |
fi | |
# BASE_URL | |
URL="$1" | |
BASE_URL=$(echo "$URL" | awk -F/ '{print $1"//"$3}') | |
# Download index page | |
curl -s "$URL" | grep -oP '(?<=href=")[^"]*' | grep '/html/' | parallel -j 10 ' | |
RELATIVE_URL={} | |
FULL_URL='"$BASE_URL"'"$RELATIVE_URL" | |
FULL_URL="${FULL_URL%/}/index" | |
# Find pdf link from first page | |
PDF_URL=$(curl -s "$FULL_URL" | grep -oP "(?<=href=\")[^\"]*" | grep "\.pdf$") | |
if [ -n "$PDF_URL" ]; then | |
# Download PDF | |
FILENAME=$(basename "$PDF_URL") | |
curl -s -o "$FILENAME" '"$BASE_URL"'"$PDF_URL" | |
fi | |
' |