Skip to content

Instantly share code, notes, and snippets.

@h8rt3rmin8r
Created November 15, 2020 09:53
Show Gist options
  • Save h8rt3rmin8r/85f03d2c26357c119051840298a1f2a9 to your computer and use it in GitHub Desktop.
Save h8rt3rmin8r/85f03d2c26357c119051840298a1f2a9 to your computer and use it in GitHub Desktop.
Download all links in a referenced input file using GNU parallel
#! /usr/bin/env bash
#>------------------------------------------------------------------------------
#>
#> [ wfox-parallel ]
#>
#> Download all links in a referenced input file using GNU parallel
#>
#> Outputs will be situated into a new directory hierarchy that mirrors the
#> origin website URL path for each file. File exif data will be retained
#> if the 'xattr' package is installed on the local machine.
#>
#> USAGE:
#>
#> wfox-parallel <FILE>
#> wfox-parallel <OPTION>
#>
#> where "FILE" is a valid file reference; and where "FILE" contains a
#> vertical list of valid URLs; and where "OPTION" is one of the following:
#>
#> |
#> -h, --help | Print this help text to the terminal
#> |
#>
#> ATTRIBUTION:
#>
#> Created on 20201115 by h8rt3rmin8r ([email protected])
#>
#>------------------------------------------------------------------------------
# Declare functions
function _get() {
function _domain() {
local ID_INX="${1}"
local ID_PRO="${2//\//\\\/}"
local ID_A="${ID_INX#$ID_PRO}"
local ID_B="${ID_A//\/*}"
local ID_C="${ID_B//*\@}"
echo "${ID_C//\?*}"
return $?
}
function _uag() {
local nonce="${RANDOM: -1}"
case "${nonce}" in
0)
echo 'Chrome/15.0.860.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/15.0.860.0'
;;
1)
echo 'Mozilla/4.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11.0.1245.0 Safari/537.36'
;;
2)
echo 'Mozilla/5.0 (Linux; U; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13'
;;
3)
echo 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872'
;;
4)
echo 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36'
;;
5)
echo 'Mozilla/5.0 (Macintosh; PPC Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1'
;;
6)
echo 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
;;
7)
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0'
;;
8)
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.5 Safari/532.2'
;;
9)
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16'
;;
esac
return $?
}
local trg_url="${1}"
local in_p=$(grep -Eo '^(([a-z0-9]+[-]?[a-z0-9]+[:])|([a-z0-9]+[:]))(//)' <<<"${trg_url}" 2>/dev/null)
local in_m=$(_domain "${trg_url}" "${in_p}")
local in_u=$(_uag)
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;25mwfox-parallel\e[0m")|$(echo -en '\e[38;5;25minfo\e[0m___')|${trg_url}" &>/dev/stderr
wget -q --xattr --force-directories --content-disposition --no-clobber --adjust-extension --no-check-certificate --timeout=3 --tries=1 \
-U "${in_u}" \
--referer="${trg_url}" \
--header="Authority: ${in_m}" \
--header="Origin: ${in_p}${in_m}" \
--header="Host: ${in_m}" \
--header="Accept: */*" \
--header="Upgrade-Insecure-Requests: 1" \
--header="Cache-Control: max-age=0" \
--header="TE: Trailers" \
--header="Pragma: no-cache" \
--header="Connection: keep-alive" \
--header="DNT: 1" "${trg_url}"
return $?
}
function _help() {
cat "${0}" \
| grep -E '^#[>]' \
| sed 's/^..//'
return $?
}
# Parse inputs and execute operations
in_f=$(readlink -f "${1}")
if [[ "${1}" =~ ^[-][hH]$ || "${1}" =~ ^[-]+help$ ]]; then
_help
exit $?
fi
if [[ ! -f "${1}" ]]; then
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;196mwfox-parallel\e[0m")|$(echo -en '\e[38;5;196merror\e[0m__')|Missing required input: <FILE>" &>/dev/stderr
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;25mwfox-parallel\e[0m")|$(echo -en '\e[38;5;25minfo\e[0m___')|Use '--help' for more information" &>/dev/stderr
exit 1
fi
export -f _get
cat "${in_f}" \
| parallel --no-notice -k _get "{}"
e_c="$?"
unset _get
exit ${e_c}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment