or: "A growing bunch of pseudo-one-liners from when I needed to accomplish something that turned into a shell scripting practice sesh instead because ADHD."
Parallel download jobs + extraction & parsing (pretty fucking fast). Requires: jq
, unfurl
CORES_X2=$(( $(nproc) * 2 )); \
CORES_HALF=$(( $(nproc) / 2 )); \
LINES_PER_BLOCK=100000; \
export LC_ALL=C; \
echo "Downloading/Extracting..." && \
curl -s https://chaos-data.projectdiscovery.io/index.json | \
jq -r -M '.[] | .URL' | \
parallel --will-cite --bar --max-args "$CORES_HALF" 'curl -sL {} | tar xO' | \
parallel --will-cite --block "$LINES_PER_BLOCK2" --spreadstdin 'unfurl format %S' | \
awk "{c[\$1]++} END {print \"Sorting lines...\" > \"/dev/stderr\"; for (i in c){print c[i],i}}" | \
sort -r -n -S1G --parallel="$CORES_X2" > chaos_subs.txt; \
echo 'Done!'