padeoe · February 27, 2025 16:25 · RewindL · Sep 26, 2024 · padeoe · Sep 26, 2024
diff --git a/README_hfd.md b/README_hfd.md
diff --git a/hfd.sh b/hfd.sh
 #!/usr/bin/env bash
 # Color definitions
 RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' # No Color

 trap 'printf "${YELLOW}\nDownload interrupted. You can resume by re-running the command.\n${NC}"; exit 1' INT

 display_help() {
    cat << EOF
 Usage:
  hfd <REPO_ID> [--include include_pattern1 include_pattern2 ...] [--exclude exclude_pattern1 exclude_pattern2 ...] [--hf_username username] [--hf_token token] [--tool aria2c|wget] [-x threads] [-j jobs] [--dataset] [--local-dir path] [--revision rev]

 Description:
  Downloads a model or dataset from Hugging Face using the provided repo ID.

 Arguments:
  REPO_ID         The Hugging Face repo ID (Required)
                  Format: 'org_name/repo_name' or legacy format (e.g., gpt2)
 Options:
  include/exclude_pattern The patterns to match against file path, supports wildcard characters.
                  e.g., '--exclude *.safetensor *.md', '--include vae/*'.
  --include       (Optional) Patterns to include files for downloading (supports multiple patterns).
  --exclude       (Optional) Patterns to exclude files from downloading (supports multiple patterns).
  --hf_username   (Optional) Hugging Face username for authentication (not email).
  --hf_token      (Optional) Hugging Face token for authentication.
  --tool          (Optional) Download tool to use: aria2c (default) or wget.
  -x              (Optional) Number of download threads for aria2c (default: 4).
  -j              (Optional) Number of concurrent downloads for aria2c (default: 5).
  --dataset       (Optional) Flag to indicate downloading a dataset.
  --local-dir     (Optional) Directory path to store the downloaded data.
                             Defaults to the current directory with a subdirectory named 'repo_name'
                             if REPO_ID is is composed of 'org_name/repo_name'.
  --revision      (Optional) Model/Dataset revision to download (default: main).

 Example:
  hfd gpt2
  hfd bigscience/bloom-560m --exclude *.safetensors
  hfd meta-llama/Llama-2-7b --hf_username myuser --hf_token mytoken -x 4
  hfd lavita/medical-qa-shared-task-v1-toy --dataset
  hfd bartowski/Phi-3.5-mini-instruct-exl2 --revision 5_0
 EOF
    exit 1
 }

 [[ -z "$1" || "$1" =~ ^-h || "$1" =~ ^--help ]] && display_help

 REPO_ID=$1
 shift

 # Default values
 TOOL="aria2c"
 THREADS=4
 CONCURRENT=5
 HF_ENDPOINT=${HF_ENDPOINT:-"https://huggingface.co"}
 INCLUDE_PATTERNS=()
 EXCLUDE_PATTERNS=()
 REVISION="main"

 validate_number() {
    [[ "$2" =~ ^[1-9][0-9]*$ && "$2" -le "$3" ]] || { printf "${RED}[Error] $1 must be 1-$3${NC}\n"; exit 1; }
 }

 # Argument parsing
 while [[ $# -gt 0 ]]; do
    case $1 in
        --include) shift; while [[ $# -gt 0 && ! ($1 =~ ^--) && ! ($1 =~ ^-[^-]) ]]; do INCLUDE_PATTERNS+=("$1"); shift; done ;;
        --exclude) shift; while [[ $# -gt 0 && ! ($1 =~ ^--) && ! ($1 =~ ^-[^-]) ]]; do EXCLUDE_PATTERNS+=("$1"); shift; done ;;
        --hf_username) HF_USERNAME="$2"; shift 2 ;;
        --hf_token) HF_TOKEN="$2"; shift 2 ;;
        --tool)
            case $2 in
                aria2c|wget)
                    TOOL="$2"
                    ;;
                *)
                    printf "%b[Error] Invalid tool. Use 'aria2c' or 'wget'.%b\n" "$RED" "$NC"
                    exit 1
                    ;;
            esac
            shift 2
            ;;
        -x) validate_number "threads (-x)" "$2" 10; THREADS="$2"; shift 2 ;;
        -j) validate_number "concurrent downloads (-j)" "$2" 10; CONCURRENT="$2"; shift 2 ;;
        --dataset) DATASET=1; shift ;;
        --local-dir) LOCAL_DIR="$2"; shift 2 ;;
        --revision) REVISION="$2"; shift 2 ;;
        *) display_help ;;
    esac
 done

 # Generate current command string
 generate_command_string() {
    local cmd_string="REPO_ID=$REPO_ID"
    cmd_string+=" TOOL=$TOOL"
    cmd_string+=" INCLUDE_PATTERNS=${INCLUDE_PATTERNS[*]}"
    cmd_string+=" EXCLUDE_PATTERNS=${EXCLUDE_PATTERNS[*]}"
    cmd_string+=" DATASET=${DATASET:-0}"
    cmd_string+=" HF_USERNAME=${HF_USERNAME:-}"
    cmd_string+=" HF_TOKEN=${HF_TOKEN:-}"
    cmd_string+=" HF_TOKEN=${HF_ENDPOINT:-}"
    cmd_string+=" REVISION=$REVISION"
    echo "$cmd_string"
 }

 # Check if aria2, wget, curl are installed
 check_command() {
    if ! command -v $1 &>/dev/null; then
        printf "%b%s is not installed. Please install it first.%b\n" "$RED" "$1" "$NC"
        exit 1
    fi
 }

 check_command curl; check_command "$TOOL"

 LOCAL_DIR="${LOCAL_DIR:-${REPO_ID#*/}}"
 mkdir -p "$LOCAL_DIR/.hfd"

 if [[ "$DATASET" == 1 ]]; then
    METADATA_API_PATH="datasets/$REPO_ID"
    DOWNLOAD_API_PATH="datasets/$REPO_ID"
    CUT_DIRS=5
 else
    METADATA_API_PATH="models/$REPO_ID"
    DOWNLOAD_API_PATH="$REPO_ID"
    CUT_DIRS=4
 fi

 # Modify API URL, construct based on revision
 if [[ "$REVISION" != "main" ]]; then
    METADATA_API_PATH="$METADATA_API_PATH/revision/$REVISION"
 fi
 API_URL="$HF_ENDPOINT/api/$METADATA_API_PATH"

 METADATA_FILE="$LOCAL_DIR/.hfd/repo_metadata.json"

 # Fetch and save metadata
 fetch_and_save_metadata() {
    status_code=$(curl -L -s -w "%{http_code}" -o "$METADATA_FILE" ${HF_TOKEN:+-H "Authorization: Bearer $HF_TOKEN"} "$API_URL")
    RESPONSE=$(cat "$METADATA_FILE")
    if [ "$status_code" -eq 200 ]; then
        printf "%s\n" "$RESPONSE"
    else
        printf "%b[Error] Failed to fetch metadata from $API_URL. HTTP status code: $status_code.%b\n$RESPONSE\n" "${RED}" "${NC}" >&2
        rm $METADATA_FILE
        exit 1
    fi
 }

 check_authentication() {
    local response="$1"
    if command -v jq &>/dev/null; then
        local gated
        gated=$(echo "$response" | jq -r '.gated // false')
        if [[ "$gated" != "false" && ( -z "$HF_TOKEN" || -z "$HF_USERNAME" ) ]]; then
            printf "${RED}The repository requires authentication, but --hf_username and --hf_token is not passed. Please get token from https://huggingface.co/settings/tokens.\nExiting.\n${NC}"
            exit 1
        fi
    else
        if echo "$response" | grep -q '"gated":[^f]' && [[ -z "$HF_TOKEN" || -z "$HF_USERNAME" ]]; then
            printf "${RED}The repository requires authentication, but --hf_username and --hf_token is not passed. Please get token from https://huggingface.co/settings/tokens.\nExiting.\n${NC}"
            exit 1
        fi
    fi
 }

 if [[ ! -f "$METADATA_FILE" ]]; then
    printf "%bFetching repo metadata...%b\n" "$YELLOW" "$NC"
    RESPONSE=$(fetch_and_save_metadata) || exit 1
    check_authentication "$RESPONSE"
 else
    printf "%bUsing cached metadata: $METADATA_FILE%b\n" "$GREEN" "$NC"
    RESPONSE=$(cat "$METADATA_FILE")
    check_authentication "$RESPONSE"
 fi

 should_regenerate_filelist() {
    local command_file="$LOCAL_DIR/.hfd/last_download_command"
    local current_command=$(generate_command_string)
    
    # If file list doesn't exist, regenerate
    if [[ ! -f "$LOCAL_DIR/$fileslist_file" ]]; then
        echo "$current_command" > "$command_file"
        return 0
    fi
    
    # If command file doesn't exist, regenerate
    if [[ ! -f "$command_file" ]]; then
        echo "$current_command" > "$command_file"
        return 0
    fi
    
    # Compare current command with saved command
    local saved_command=$(cat "$command_file")
    if [[ "$current_command" != "$saved_command" ]]; then
        echo "$current_command" > "$command_file"
        return 0
    fi
    
    return 1
 }

 fileslist_file=".hfd/${TOOL}_urls.txt"

 if should_regenerate_filelist; then
    # Remove existing file list if it exists
    [[ -f "$LOCAL_DIR/$fileslist_file" ]] && rm "$LOCAL_DIR/$fileslist_file"
    
    printf "%bGenerating file list...%b\n" "$YELLOW" "$NC"
    
    # Convert include and exclude patterns to regex
    INCLUDE_REGEX=""
    EXCLUDE_REGEX=""
    if ((${#INCLUDE_PATTERNS[@]})); then
        INCLUDE_REGEX=$(printf '%s\n' "${INCLUDE_PATTERNS[@]}" | sed 's/\./\\./g; s/\*/.*/g' | paste -sd '|' -)
    fi
    if ((${#EXCLUDE_PATTERNS[@]})); then
        EXCLUDE_REGEX=$(printf '%s\n' "${EXCLUDE_PATTERNS[@]}" | sed 's/\./\\./g; s/\*/.*/g' | paste -sd '|' -)
    fi

    # Check if jq is available
    if command -v jq &>/dev/null; then
        process_with_jq() {
            if [[ "$TOOL" == "aria2c" ]]; then
                printf "%s" "$RESPONSE" | jq -r \
                    --arg endpoint "$HF_ENDPOINT" \
                    --arg repo_id "$DOWNLOAD_API_PATH" \
                    --arg token "$HF_TOKEN" \
                    --arg include_regex "$INCLUDE_REGEX" \
                    --arg exclude_regex "$EXCLUDE_REGEX" \
                    --arg revision "$REVISION" \
                    '
                    .siblings[]
                    | select(
                        .rfilename != null
                        and ($include_regex == "" or (.rfilename | test($include_regex)))
                        and ($exclude_regex == "" or (.rfilename | test($exclude_regex) | not))
                      )
                    | [
                        ($endpoint + "/" + $repo_id + "/resolve/" + $revision + "/" + .rfilename),
                        " dir=" + (.rfilename | split("/")[:-1] | join("/")),
                        " out=" + (.rfilename | split("/")[-1]),
                        if $token != "" then " header=Authorization: Bearer " + $token else empty end,
                        ""
                      ]
                    | join("\n")
                    '
            else
                printf "%s" "$RESPONSE" | jq -r \
                    --arg endpoint "$HF_ENDPOINT" \
                    --arg repo_id "$DOWNLOAD_API_PATH" \
                    --arg include_regex "$INCLUDE_REGEX" \
                    --arg exclude_regex "$EXCLUDE_REGEX" \
                    --arg revision "$REVISION" \
                    '
                    .siblings[]
                    | select(
                        .rfilename != null
                        and ($include_regex == "" or (.rfilename | test($include_regex)))
                        and ($exclude_regex == "" or (.rfilename | test($exclude_regex) | not))
                      )
                    | ($endpoint + "/" + $repo_id + "/resolve/" + $revision + "/" + .rfilename)
                    '
            fi
        }
        result=$(process_with_jq)
        printf "%s\n" "$result" > "$LOCAL_DIR/$fileslist_file"
    else
        printf "%b[Warning] jq not installed, using grep/awk for metadata json parsing (slower). Consider installing jq for better parsing performance.%b\n" "$YELLOW" "$NC"
        process_with_grep_awk() {
            local include_pattern=""
            local exclude_pattern=""
            local output=""
            
            if ((${#INCLUDE_PATTERNS[@]})); then
                include_pattern=$(printf '%s\n' "${INCLUDE_PATTERNS[@]}" | sed 's/\./\\./g; s/\*/.*/g' | paste -sd '|' -)
            fi
            if ((${#EXCLUDE_PATTERNS[@]})); then
                exclude_pattern=$(printf '%s\n' "${EXCLUDE_PATTERNS[@]}" | sed 's/\./\\./g; s/\*/.*/g' | paste -sd '|' -)
            fi

            local files=$(printf '%s' "$RESPONSE" | grep -o '"rfilename":"[^"]*"' | awk -F'"' '{print $4}')
            
            if [[ -n "$include_pattern" ]]; then
                files=$(printf '%s\n' "$files" | grep -E "$include_pattern")
            fi
            if [[ -n "$exclude_pattern" ]]; then
                files=$(printf '%s\n' "$files" | grep -vE "$exclude_pattern")
            fi

            while IFS= read -r file; do
                if [[ -n "$file" ]]; then
                    if [[ "$TOOL" == "aria2c" ]]; then
                        output+="$HF_ENDPOINT/$DOWNLOAD_API_PATH/resolve/$REVISION/$file"$'\n'
                        output+=" dir=$(dirname "$file")"$'\n'
                        output+=" out=$(basename "$file")"$'\n'
                        [[ -n "$HF_TOKEN" ]] && output+=" header=Authorization: Bearer $HF_TOKEN"$'\n'
                        output+=$'\n'
                    else
                        output+="$HF_ENDPOINT/$DOWNLOAD_API_PATH/resolve/$REVISION/$file"$'\n'
                    fi
                fi
            done <<< "$files"

            printf '%s' "$output"
        }

        result=$(process_with_grep_awk)
        printf "%s\n" "$result" > "$LOCAL_DIR/$fileslist_file"
    fi
 else
    printf "%bResume from file list: $LOCAL_DIR/$fileslist_file%b\n" "$GREEN" "$NC"
 fi

 # Perform download
 printf "${YELLOW}Starting download with $TOOL to $LOCAL_DIR...\n${NC}"

 cd "$LOCAL_DIR"
 if [[ "$TOOL" == "aria2c" ]]; then
    aria2c --console-log-level=error --file-allocation=none -x "$THREADS" -j "$CONCURRENT" -s "$THREADS" -k 1M -c -i "$fileslist_file" --save-session="$fileslist_file"
 elif [[ "$TOOL" == "wget" ]]; then
    wget -x -nH --cut-dirs="$CUT_DIRS" ${HF_TOKEN:+--header="Authorization: Bearer $HF_TOKEN"} --input-file="$fileslist_file" --continue
 fi

 if [[ $? -eq 0 ]]; then
    printf "${GREEN}Download completed successfully. Repo directory: $PWD\n${NC}"
 else
    printf "${RED}Download encountered errors.\n${NC}"
    exit 1
 fi
	#!/usr/bin/env bash
	# Color definitions
	RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' # No Color

	trap 'printf "${YELLOW}\nDownload interrupted. You can resume by re-running the command.\n${NC}"; exit 1' INT

	display_help() {
	cat << EOF
	Usage:
	hfd <REPO_ID> [--include include_pattern1 include_pattern2 ...] [--exclude exclude_pattern1 exclude_pattern2 ...] [--hf_username username] [--hf_token token] [--tool aria2c\|wget] [-x threads] [-j jobs] [--dataset] [--local-dir path] [--revision rev]

	Description:
	Downloads a model or dataset from Hugging Face using the provided repo ID.

	Arguments:
	REPO_ID The Hugging Face repo ID (Required)
	Format: 'org_name/repo_name' or legacy format (e.g., gpt2)
	Options:
	include/exclude_pattern The patterns to match against file path, supports wildcard characters.
	e.g., '--exclude .safetensor .md', '--include vae/*'.
	--include (Optional) Patterns to include files for downloading (supports multiple patterns).
	--exclude (Optional) Patterns to exclude files from downloading (supports multiple patterns).
	--hf_username (Optional) Hugging Face username for authentication (not email).
	--hf_token (Optional) Hugging Face token for authentication.
	--tool (Optional) Download tool to use: aria2c (default) or wget.
	-x (Optional) Number of download threads for aria2c (default: 4).
	-j (Optional) Number of concurrent downloads for aria2c (default: 5).
	--dataset (Optional) Flag to indicate downloading a dataset.
	--local-dir (Optional) Directory path to store the downloaded data.
	Defaults to the current directory with a subdirectory named 'repo_name'
	if REPO_ID is is composed of 'org_name/repo_name'.
	--revision (Optional) Model/Dataset revision to download (default: main).

	Example:
	hfd gpt2
	hfd bigscience/bloom-560m --exclude *.safetensors
	hfd meta-llama/Llama-2-7b --hf_username myuser --hf_token mytoken -x 4
	hfd lavita/medical-qa-shared-task-v1-toy --dataset
	hfd bartowski/Phi-3.5-mini-instruct-exl2 --revision 5_0
	EOF
	exit 1
	}

	[[ -z "$1" \|\| "$1" =~ ^-h \|\| "$1" =~ ^--help ]] && display_help

	REPO_ID=$1
	shift

	# Default values
	TOOL="aria2c"
	THREADS=4
	CONCURRENT=5
	HF_ENDPOINT=${HF_ENDPOINT:-"https://huggingface.co"}
	INCLUDE_PATTERNS=()
	EXCLUDE_PATTERNS=()
	REVISION="main"

	validate_number() {
	[[ "$2" =~ ^[1-9][0-9]*$ && "$2" -le "$3" ]] \|\| { printf "${RED}[Error] $1 must be 1-$3${NC}\n"; exit 1; }
	}

	# Argument parsing
	while [[ $# -gt 0 ]]; do
	case $1 in
	--include) shift; while [[ $# -gt 0 && ! ($1 =~ ^--) && ! ($1 =~ ^-[^-]) ]]; do INCLUDE_PATTERNS+=("$1"); shift; done ;;
	--exclude) shift; while [[ $# -gt 0 && ! ($1 =~ ^--) && ! ($1 =~ ^-[^-]) ]]; do EXCLUDE_PATTERNS+=("$1"); shift; done ;;
	--hf_username) HF_USERNAME="$2"; shift 2 ;;
	--hf_token) HF_TOKEN="$2"; shift 2 ;;
	--tool)
	case $2 in
	aria2c\|wget)
	TOOL="$2"
	;;
	*)
	printf "%b[Error] Invalid tool. Use 'aria2c' or 'wget'.%b\n" "$RED" "$NC"
	exit 1
	;;
	esac
	shift 2
	;;
	-x) validate_number "threads (-x)" "$2" 10; THREADS="$2"; shift 2 ;;
	-j) validate_number "concurrent downloads (-j)" "$2" 10; CONCURRENT="$2"; shift 2 ;;
	--dataset) DATASET=1; shift ;;
	--local-dir) LOCAL_DIR="$2"; shift 2 ;;
	--revision) REVISION="$2"; shift 2 ;;
	*) display_help ;;
	esac
	done

	# Generate current command string
	generate_command_string() {
	local cmd_string="REPO_ID=$REPO_ID"
	cmd_string+=" TOOL=$TOOL"
	cmd_string+=" INCLUDE_PATTERNS=${INCLUDE_PATTERNS[*]}"
	cmd_string+=" EXCLUDE_PATTERNS=${EXCLUDE_PATTERNS[*]}"
	cmd_string+=" DATASET=${DATASET:-0}"
	cmd_string+=" HF_USERNAME=${HF_USERNAME:-}"
	cmd_string+=" HF_TOKEN=${HF_TOKEN:-}"
	cmd_string+=" HF_TOKEN=${HF_ENDPOINT:-}"
	cmd_string+=" REVISION=$REVISION"
	echo "$cmd_string"
	}

	# Check if aria2, wget, curl are installed
	check_command() {
	if ! command -v $1 &>/dev/null; then
	printf "%b%s is not installed. Please install it first.%b\n" "$RED" "$1" "$NC"
	exit 1
	fi
	}

	check_command curl; check_command "$TOOL"

	LOCAL_DIR="${LOCAL_DIR:-${REPO_ID#*/}}"
	mkdir -p "$LOCAL_DIR/.hfd"

	if [[ "$DATASET" == 1 ]]; then
	METADATA_API_PATH="datasets/$REPO_ID"
	DOWNLOAD_API_PATH="datasets/$REPO_ID"
	CUT_DIRS=5
	else
	METADATA_API_PATH="models/$REPO_ID"
	DOWNLOAD_API_PATH="$REPO_ID"
	CUT_DIRS=4
	fi

	# Modify API URL, construct based on revision
	if [[ "$REVISION" != "main" ]]; then
	METADATA_API_PATH="$METADATA_API_PATH/revision/$REVISION"
	fi
	API_URL="$HF_ENDPOINT/api/$METADATA_API_PATH"

	METADATA_FILE="$LOCAL_DIR/.hfd/repo_metadata.json"

	# Fetch and save metadata
	fetch_and_save_metadata() {
	status_code=$(curl -L -s -w "%{http_code}" -o "$METADATA_FILE" ${HF_TOKEN:+-H "Authorization: Bearer $HF_TOKEN"} "$API_URL")
	RESPONSE=$(cat "$METADATA_FILE")
	if [ "$status_code" -eq 200 ]; then
	printf "%s\n" "$RESPONSE"
	else
	printf "%b[Error] Failed to fetch metadata from $API_URL. HTTP status code: $status_code.%b\n$RESPONSE\n" "${RED}" "${NC}" >&2
	rm $METADATA_FILE
	exit 1
	fi
	}

	check_authentication() {
	local response="$1"
	if command -v jq &>/dev/null; then
	local gated
	gated=$(echo "$response" \| jq -r '.gated // false')
	if [[ "$gated" != "false" && ( -z "$HF_TOKEN" \|\| -z "$HF_USERNAME" ) ]]; then
	printf "${RED}The repository requires authentication, but --hf_username and --hf_token is not passed. Please get token from https://huggingface.co/settings/tokens.\nExiting.\n${NC}"
	exit 1
	fi
	else
	if echo "$response" \| grep -q '"gated":[^f]' && [[ -z "$HF_TOKEN" \|\| -z "$HF_USERNAME" ]]; then
	printf "${RED}The repository requires authentication, but --hf_username and --hf_token is not passed. Please get token from https://huggingface.co/settings/tokens.\nExiting.\n${NC}"
	exit 1
	fi
	fi
	}

	if [[ ! -f "$METADATA_FILE" ]]; then
	printf "%bFetching repo metadata...%b\n" "$YELLOW" "$NC"
	RESPONSE=$(fetch_and_save_metadata) \|\| exit 1
	check_authentication "$RESPONSE"
	else
	printf "%bUsing cached metadata: $METADATA_FILE%b\n" "$GREEN" "$NC"
	RESPONSE=$(cat "$METADATA_FILE")
	check_authentication "$RESPONSE"
	fi

	should_regenerate_filelist() {
	local command_file="$LOCAL_DIR/.hfd/last_download_command"
	local current_command=$(generate_command_string)

	# If file list doesn't exist, regenerate
	if [[ ! -f "$LOCAL_DIR/$fileslist_file" ]]; then
	echo "$current_command" > "$command_file"
	return 0
	fi

	# If command file doesn't exist, regenerate
	if [[ ! -f "$command_file" ]]; then
	echo "$current_command" > "$command_file"
	return 0
	fi

	# Compare current command with saved command
	local saved_command=$(cat "$command_file")
	if [[ "$current_command" != "$saved_command" ]]; then
	echo "$current_command" > "$command_file"
	return 0
	fi

	return 1
	}

	fileslist_file=".hfd/${TOOL}_urls.txt"

	if should_regenerate_filelist; then
	# Remove existing file list if it exists
	[[ -f "$LOCAL_DIR/$fileslist_file" ]] && rm "$LOCAL_DIR/$fileslist_file"

	printf "%bGenerating file list...%b\n" "$YELLOW" "$NC"

	# Convert include and exclude patterns to regex
	INCLUDE_REGEX=""
	EXCLUDE_REGEX=""
	if ((${#INCLUDE_PATTERNS[@]})); then
	INCLUDE_REGEX=$(printf '%s\n' "${INCLUDE_PATTERNS[@]}" \| sed 's/\./\\./g; s/\/./g' \| paste -sd '\|' -)
	fi
	if ((${#EXCLUDE_PATTERNS[@]})); then
	EXCLUDE_REGEX=$(printf '%s\n' "${EXCLUDE_PATTERNS[@]}" \| sed 's/\./\\./g; s/\/./g' \| paste -sd '\|' -)
	fi

	# Check if jq is available
	if command -v jq &>/dev/null; then
	process_with_jq() {
	if [[ "$TOOL" == "aria2c" ]]; then
	printf "%s" "$RESPONSE" \| jq -r \
	--arg endpoint "$HF_ENDPOINT" \
	--arg repo_id "$DOWNLOAD_API_PATH" \
	--arg token "$HF_TOKEN" \
	--arg include_regex "$INCLUDE_REGEX" \
	--arg exclude_regex "$EXCLUDE_REGEX" \
	--arg revision "$REVISION" \
	'
	.siblings[]
	\| select(
	.rfilename != null
	and ($include_regex == "" or (.rfilename \| test($include_regex)))
	and ($exclude_regex == "" or (.rfilename \| test($exclude_regex) \| not))
	)
	\| [
	($endpoint + "/" + $repo_id + "/resolve/" + $revision + "/" + .rfilename),
	" dir=" + (.rfilename \| split("/")[:-1] \| join("/")),
	" out=" + (.rfilename \| split("/")[-1]),
	if $token != "" then " header=Authorization: Bearer " + $token else empty end,
	""
	]
	\| join("\n")
	'
	else
	printf "%s" "$RESPONSE" \| jq -r \
	--arg endpoint "$HF_ENDPOINT" \
	--arg repo_id "$DOWNLOAD_API_PATH" \
	--arg include_regex "$INCLUDE_REGEX" \
	--arg exclude_regex "$EXCLUDE_REGEX" \
	--arg revision "$REVISION" \
	'
	.siblings[]
	\| select(
	.rfilename != null
	and ($include_regex == "" or (.rfilename \| test($include_regex)))
	and ($exclude_regex == "" or (.rfilename \| test($exclude_regex) \| not))
	)
	\| ($endpoint + "/" + $repo_id + "/resolve/" + $revision + "/" + .rfilename)
	'
	fi
	}
	result=$(process_with_jq)
	printf "%s\n" "$result" > "$LOCAL_DIR/$fileslist_file"
	else
	printf "%b[Warning] jq not installed, using grep/awk for metadata json parsing (slower). Consider installing jq for better parsing performance.%b\n" "$YELLOW" "$NC"
	process_with_grep_awk() {
	local include_pattern=""
	local exclude_pattern=""
	local output=""

	if ((${#INCLUDE_PATTERNS[@]})); then
	include_pattern=$(printf '%s\n' "${INCLUDE_PATTERNS[@]}" \| sed 's/\./\\./g; s/\/./g' \| paste -sd '\|' -)
	fi
	if ((${#EXCLUDE_PATTERNS[@]})); then
	exclude_pattern=$(printf '%s\n' "${EXCLUDE_PATTERNS[@]}" \| sed 's/\./\\./g; s/\/./g' \| paste -sd '\|' -)
	fi

	local files=$(printf '%s' "$RESPONSE" \| grep -o '"rfilename":"[^"]*"' \| awk -F'"' '{print $4}')

	if [[ -n "$include_pattern" ]]; then
	files=$(printf '%s\n' "$files" \| grep -E "$include_pattern")
	fi
	if [[ -n "$exclude_pattern" ]]; then
	files=$(printf '%s\n' "$files" \| grep -vE "$exclude_pattern")
	fi

	while IFS= read -r file; do
	if [[ -n "$file" ]]; then
	if [[ "$TOOL" == "aria2c" ]]; then
	output+="$HF_ENDPOINT/$DOWNLOAD_API_PATH/resolve/$REVISION/$file"$'\n'
	output+=" dir=$(dirname "$file")"$'\n'
	output+=" out=$(basename "$file")"$'\n'
	[[ -n "$HF_TOKEN" ]] && output+=" header=Authorization: Bearer $HF_TOKEN"$'\n'
	output+=$'\n'
	else
	output+="$HF_ENDPOINT/$DOWNLOAD_API_PATH/resolve/$REVISION/$file"$'\n'
	fi
	fi
	done <<< "$files"

	printf '%s' "$output"
	}

	result=$(process_with_grep_awk)
	printf "%s\n" "$result" > "$LOCAL_DIR/$fileslist_file"
	fi
	else
	printf "%bResume from file list: $LOCAL_DIR/$fileslist_file%b\n" "$GREEN" "$NC"
	fi

	# Perform download
	printf "${YELLOW}Starting download with $TOOL to $LOCAL_DIR...\n${NC}"

	cd "$LOCAL_DIR"
	if [[ "$TOOL" == "aria2c" ]]; then
	aria2c --console-log-level=error --file-allocation=none -x "$THREADS" -j "$CONCURRENT" -s "$THREADS" -k 1M -c -i "$fileslist_file" --save-session="$fileslist_file"
	elif [[ "$TOOL" == "wget" ]]; then
	wget -x -nH --cut-dirs="$CUT_DIRS" ${HF_TOKEN:+--header="Authorization: Bearer $HF_TOKEN"} --input-file="$fileslist_file" --continue
	fi

	if [[ $? -eq 0 ]]; then
	printf "${GREEN}Download completed successfully. Repo directory: $PWD\n${NC}"
	else
	printf "${RED}Download encountered errors.\n${NC}"
	exit 1
	fi