Last active
December 11, 2024 09:14
-
-
Save ip413/d1c6df5c5b695ea36db84a68f5cce796 to your computer and use it in GitHub Desktop.
Download files from list of URLs, with random timeouts or without it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env zsh | |
# zsh handles decimal values automatically | |
if [ $# -eq 0 ]; then | |
printf "No arguments provided!\n\n" | |
cat <<EOF | |
Script accepts three arguments: | |
- file with URLs (one url = one line) (required) | |
- min time of random wait between downloads (seconds, optional, default: 1) | |
- max time of random wait between downloads (seconds, optional, default: 10) | |
Examples: | |
./download-files.sh ../../urls.txt # default values applied | |
./download-files.sh ../../urls.txt 3 14 | |
./download-files.sh ../../urls.txt 3 3 | |
./download-files.sh ../../urls.txt 0 0 | |
Alternative one-liner: | |
for url in \$(cat urls.txt); do curl -O "\$url"; sleep \$((RANDOM % 10 + 1)); done | |
EOF | |
exit 1 | |
fi | |
# min value of sleep between downloads (seconds), default value | |
MIN_SLEEP=1 | |
# max value of sleep between downloads (seconds), default value | |
MAX_SLEEP=10 | |
# value of next sleep timeout between downloading (seconds) | |
CURRENT_SLEEP=0 | |
# user agent | |
USER_AGENT='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' | |
# for testing use "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")" | |
SCRIPT_START_TIME="$(date +"%Y-%m-%d %H:%M:%S")" | |
NUMBER_OF_URLS=$(wc -l < "$1") | |
function validate_input() { | |
# is curl accessible? | |
if ! command -v curl 2>&1 >/dev/null | |
then | |
echo "curl could not be found" | |
exit 1 | |
fi | |
# assign default value to MIX_SLEEP if not provided | |
MIN_SLEEP=${2:-$MIN_SLEEP} | |
# assign default value to MAX_SLEEP if not provided | |
MAX_SLEEP=${3:-$MAX_SLEEP} | |
# check min and max values | |
if [[ ! $MIN_SLEEP =~ ^[0-9]+$ ]] || [[ ! $MAX_SLEEP =~ ^[0-9]+$ ]]; then | |
echo "Arguments 2 and 3 must be numbers" | |
exit 1 | |
fi | |
if [[ $MAX_SLEEP -lt $MIN_SLEEP ]]; then | |
echo "Argument 3 must be greater than or equal to argument 2" | |
exit 1 | |
fi | |
} | |
function download_files() { | |
local counter=0 | |
# be sure that script will process the very last line from the input file | |
# and prevents read from trimming whitespace and interpreting backslashes | |
while IFS= read -r url || [ -n "$url" ]; do | |
local progress=$(( 100.0 * $counter / $NUMBER_OF_URLS )) | |
printf "line: %d/%d, progress: %.2f%%\n" "$(( $counter + 1 ))" $NUMBER_OF_URLS $progress; | |
printf 'downloading %s\n' "${url}" | |
curl -O -J -l -H "User-Agent: $USER_AGENT" -H "Connection: keep-alive" -sw 'status code: %{http_code}\n' "$url" | |
calc_remaining_time "$progress" "$SCRIPT_START_TIME" | |
if [ "$MAX_SLEEP" -ne 0 ]; then | |
CURRENT_SLEEP=$(random_number "$MIN_SLEEP" "$MAX_SLEEP") | |
fi | |
printf "sleep %ds\n\n\n" "$CURRENT_SLEEP" | |
((counter++)) | |
sleep $CURRENT_SLEEP | |
done <"$1" | |
} | |
# RANDOM in zsh returns the same number (different behavior than bash) | |
# so we need to work on /dev/urandom (reseed doesn't work) | |
function random_number() { | |
local min="$1" | |
local max="$2" | |
local range=$((max - min + 1)) | |
local random_num=$(od -An -N2 -i /dev/urandom | tr -d ' ') | |
echo $((random_num % range + min)) | |
} | |
# for debugging purposes, please leave this | |
# function test_random_number() { | |
# for _ in {1..10}; do | |
# echo "Random sleep time: $(random_number 1 10)" | |
# done | |
# } | |
# test_random_number | |
function calc_remaining_time() { | |
local percent="$1" | |
local start_time="$2" | |
local elapsed_seconds=$(( $(date +%s) - $(date +%s -d "$start_time") )) | |
local elapsed_time=$(printf "%02d:%02d:%02d" $((elapsed_seconds / 3600)) $((elapsed_seconds % 3600 / 60)) $((elapsed_seconds % 60))) | |
if [[ $percent -eq 0 ]]; then | |
print_time $elapsed_time "unknown" | |
return | |
fi | |
local estimated_total_seconds=$(( elapsed_seconds * 100 / percent )) | |
local remaining_seconds=$(( estimated_total_seconds - elapsed_seconds )) | |
local remaining_time=$(printf "%02d:%02d:%02d" $((remaining_seconds / 3600)) $((remaining_seconds % 3600 / 60)) $((remaining_seconds % 60))) | |
print_time $elapsed_time $remaining_time | |
} | |
# for debugging purposes, please leave this | |
# function test_calc_remaining_time() { | |
# calc_remaining_time 0 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")" | |
# calc_remaining_time 0.11535662342566345 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")" | |
# calc_remaining_time 10 "$(date -d "1 hour ago" +"%Y-%m-%d %H:%M:%S")" | |
# calc_remaining_time 90 "$(date -d "1 minute ago" +"%Y-%m-%d %H:%M:%S")" | |
# calc_remaining_time 100 "$(date -d "1 minute ago" +"%Y-%m-%d %H:%M:%S")" | |
# } | |
# test_calc_remaining_time | |
function print_time() { | |
echo "time elapsed: $1, remaining: $2 [hh:mm:ss]" | |
} | |
function print_summary() { | |
local time=$(( $(date +%s) - $(date -d "$SCRIPT_START_TIME" +%s) )) | |
local time_per_url=$(( time / $NUMBER_OF_URLS)) | |
printf "\n\nFinished! It took: $time seconds, $time_per_url seconds/url\n" | |
} | |
printf "$SCRIPT_START_TIME started!\n\n" | |
validate_input $@ | |
download_files $@ | |
print_summary | |
notify-send "Finished downloading files $1 !" 2>/dev/null |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment