Skip to content

Instantly share code, notes, and snippets.

@ip413
Last active December 11, 2024 09:14
Show Gist options
  • Save ip413/d1c6df5c5b695ea36db84a68f5cce796 to your computer and use it in GitHub Desktop.
Save ip413/d1c6df5c5b695ea36db84a68f5cce796 to your computer and use it in GitHub Desktop.
Download files from list of URLs, with random timeouts or without it
#!/usr/bin/env zsh
# zsh handles decimal values automatically
if [ $# -eq 0 ]; then
printf "No arguments provided!\n\n"
cat <<EOF
Script accepts three arguments:
- file with URLs (one url = one line) (required)
- min time of random wait between downloads (seconds, optional, default: 1)
- max time of random wait between downloads (seconds, optional, default: 10)
Examples:
./download-files.sh ../../urls.txt # default values applied
./download-files.sh ../../urls.txt 3 14
./download-files.sh ../../urls.txt 3 3
./download-files.sh ../../urls.txt 0 0
Alternative one-liner:
for url in \$(cat urls.txt); do curl -O "\$url"; sleep \$((RANDOM % 10 + 1)); done
EOF
exit 1
fi
# min value of sleep between downloads (seconds), default value
MIN_SLEEP=1
# max value of sleep between downloads (seconds), default value
MAX_SLEEP=10
# value of next sleep timeout between downloading (seconds)
CURRENT_SLEEP=0
# user agent
USER_AGENT='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
# for testing use "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")"
SCRIPT_START_TIME="$(date +"%Y-%m-%d %H:%M:%S")"
NUMBER_OF_URLS=$(wc -l < "$1")
function validate_input() {
# is curl accessible?
if ! command -v curl 2>&1 >/dev/null
then
echo "curl could not be found"
exit 1
fi
# assign default value to MIX_SLEEP if not provided
MIN_SLEEP=${2:-$MIN_SLEEP}
# assign default value to MAX_SLEEP if not provided
MAX_SLEEP=${3:-$MAX_SLEEP}
# check min and max values
if [[ ! $MIN_SLEEP =~ ^[0-9]+$ ]] || [[ ! $MAX_SLEEP =~ ^[0-9]+$ ]]; then
echo "Arguments 2 and 3 must be numbers"
exit 1
fi
if [[ $MAX_SLEEP -lt $MIN_SLEEP ]]; then
echo "Argument 3 must be greater than or equal to argument 2"
exit 1
fi
}
function download_files() {
local counter=0
# be sure that script will process the very last line from the input file
# and prevents read from trimming whitespace and interpreting backslashes
while IFS= read -r url || [ -n "$url" ]; do
local progress=$(( 100.0 * $counter / $NUMBER_OF_URLS ))
printf "line: %d/%d, progress: %.2f%%\n" "$(( $counter + 1 ))" $NUMBER_OF_URLS $progress;
printf 'downloading %s\n' "${url}"
curl -O -J -l -H "User-Agent: $USER_AGENT" -H "Connection: keep-alive" -sw 'status code: %{http_code}\n' "$url"
calc_remaining_time "$progress" "$SCRIPT_START_TIME"
if [ "$MAX_SLEEP" -ne 0 ]; then
CURRENT_SLEEP=$(random_number "$MIN_SLEEP" "$MAX_SLEEP")
fi
printf "sleep %ds\n\n\n" "$CURRENT_SLEEP"
((counter++))
sleep $CURRENT_SLEEP
done <"$1"
}
# RANDOM in zsh returns the same number (different behavior than bash)
# so we need to work on /dev/urandom (reseed doesn't work)
function random_number() {
local min="$1"
local max="$2"
local range=$((max - min + 1))
local random_num=$(od -An -N2 -i /dev/urandom | tr -d ' ')
echo $((random_num % range + min))
}
# for debugging purposes, please leave this
# function test_random_number() {
# for _ in {1..10}; do
# echo "Random sleep time: $(random_number 1 10)"
# done
# }
# test_random_number
function calc_remaining_time() {
local percent="$1"
local start_time="$2"
local elapsed_seconds=$(( $(date +%s) - $(date +%s -d "$start_time") ))
local elapsed_time=$(printf "%02d:%02d:%02d" $((elapsed_seconds / 3600)) $((elapsed_seconds % 3600 / 60)) $((elapsed_seconds % 60)))
if [[ $percent -eq 0 ]]; then
print_time $elapsed_time "unknown"
return
fi
local estimated_total_seconds=$(( elapsed_seconds * 100 / percent ))
local remaining_seconds=$(( estimated_total_seconds - elapsed_seconds ))
local remaining_time=$(printf "%02d:%02d:%02d" $((remaining_seconds / 3600)) $((remaining_seconds % 3600 / 60)) $((remaining_seconds % 60)))
print_time $elapsed_time $remaining_time
}
# for debugging purposes, please leave this
# function test_calc_remaining_time() {
# calc_remaining_time 0 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")"
# calc_remaining_time 0.11535662342566345 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")"
# calc_remaining_time 10 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")"
# calc_remaining_time 90 "$(date -d "1 minute ago" +"%Y-%m-%d %H:%M:%S")"
# calc_remaining_time 100 "$(date -d "1 minute ago" +"%Y-%m-%d %H:%M:%S")"
# }
# test_calc_remaining_time
function print_time() {
echo "time elapsed: $1, remaining: $2 [hh:mm:ss]"
}
function print_summary() {
local time=$(( $(date +%s) - $(date -d "$SCRIPT_START_TIME" +%s) ))
local time_per_url=$(( time / $NUMBER_OF_URLS))
printf "\n\nFinished! It took: $time seconds, $time_per_url seconds/url\n"
}
printf "$SCRIPT_START_TIME started!\n\n"
validate_input $@
download_files $@
print_summary
notify-send "Finished downloading files $1 !" 2>/dev/null
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment