Created
November 15, 2020 09:53
-
-
Save h8rt3rmin8r/85f03d2c26357c119051840298a1f2a9 to your computer and use it in GitHub Desktop.
Download all links in a referenced input file using GNU parallel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
#>------------------------------------------------------------------------------ | |
#> | |
#> [ wfox-parallel ] | |
#> | |
#> Download all links in a referenced input file using GNU parallel | |
#> | |
#> Outputs will be situated into a new directory hierarchy that mirrors the | |
#> origin website URL path for each file. File exif data will be retained | |
#> if the 'xattr' package is installed on the local machine. | |
#> | |
#> USAGE: | |
#> | |
#> wfox-parallel <FILE> | |
#> wfox-parallel <OPTION> | |
#> | |
#> where "FILE" is a valid file reference; and where "FILE" contains a | |
#> vertical list of valid URLs; and where "OPTION" is one of the following: | |
#> | |
#> | | |
#> -h, --help | Print this help text to the terminal | |
#> | | |
#> | |
#> ATTRIBUTION: | |
#> | |
#> Created on 20201115 by h8rt3rmin8r ([email protected]) | |
#> | |
#>------------------------------------------------------------------------------ | |
# Declare functions | |
function _get() { | |
function _domain() { | |
local ID_INX="${1}" | |
local ID_PRO="${2//\//\\\/}" | |
local ID_A="${ID_INX#$ID_PRO}" | |
local ID_B="${ID_A//\/*}" | |
local ID_C="${ID_B//*\@}" | |
echo "${ID_C//\?*}" | |
return $? | |
} | |
function _uag() { | |
local nonce="${RANDOM: -1}" | |
case "${nonce}" in | |
0) | |
echo 'Chrome/15.0.860.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/15.0.860.0' | |
;; | |
1) | |
echo 'Mozilla/4.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11.0.1245.0 Safari/537.36' | |
;; | |
2) | |
echo 'Mozilla/5.0 (Linux; U; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13' | |
;; | |
3) | |
echo 'Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872' | |
;; | |
4) | |
echo 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36' | |
;; | |
5) | |
echo 'Mozilla/5.0 (Macintosh; PPC Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1' | |
;; | |
6) | |
echo 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' | |
;; | |
7) | |
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0' | |
;; | |
8) | |
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.5 Safari/532.2' | |
;; | |
9) | |
echo 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16' | |
;; | |
esac | |
return $? | |
} | |
local trg_url="${1}" | |
local in_p=$(grep -Eo '^(([a-z0-9]+[-]?[a-z0-9]+[:])|([a-z0-9]+[:]))(//)' <<<"${trg_url}" 2>/dev/null) | |
local in_m=$(_domain "${trg_url}" "${in_p}") | |
local in_u=$(_uag) | |
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;25mwfox-parallel\e[0m")|$(echo -en '\e[38;5;25minfo\e[0m___')|${trg_url}" &>/dev/stderr | |
wget -q --xattr --force-directories --content-disposition --no-clobber --adjust-extension --no-check-certificate --timeout=3 --tries=1 \ | |
-U "${in_u}" \ | |
--referer="${trg_url}" \ | |
--header="Authority: ${in_m}" \ | |
--header="Origin: ${in_p}${in_m}" \ | |
--header="Host: ${in_m}" \ | |
--header="Accept: */*" \ | |
--header="Upgrade-Insecure-Requests: 1" \ | |
--header="Cache-Control: max-age=0" \ | |
--header="TE: Trailers" \ | |
--header="Pragma: no-cache" \ | |
--header="Connection: keep-alive" \ | |
--header="DNT: 1" "${trg_url}" | |
return $? | |
} | |
function _help() { | |
cat "${0}" \ | |
| grep -E '^#[>]' \ | |
| sed 's/^..//' | |
return $? | |
} | |
# Parse inputs and execute operations | |
in_f=$(readlink -f "${1}") | |
if [[ "${1}" =~ ^[-][hH]$ || "${1}" =~ ^[-]+help$ ]]; then | |
_help | |
exit $? | |
fi | |
if [[ ! -f "${1}" ]]; then | |
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;196mwfox-parallel\e[0m")|$(echo -en '\e[38;5;196merror\e[0m__')|Missing required input: <FILE>" &>/dev/stderr | |
echo "$(date '+%s.%N')|${PPID}|$(printf "\e[38;5;25mwfox-parallel\e[0m")|$(echo -en '\e[38;5;25minfo\e[0m___')|Use '--help' for more information" &>/dev/stderr | |
exit 1 | |
fi | |
export -f _get | |
cat "${in_f}" \ | |
| parallel --no-notice -k _get "{}" | |
e_c="$?" | |
unset _get | |
exit ${e_c} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment