Last active
September 6, 2017 08:55
-
-
Save azet/e81d7bc54332ba6ebffb to your computer and use it in GitHub Desktop.
Retrieves MX and A records for 'Alexa Top 1 Million' hosts and prints them as pretty formatted JSON objects to stdout.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Retrieves MX and A records for 'Alexa Top 1 Million' hosts | |
# and prints them as pretty formatted JSON objects to stdout. | |
# | |
# *Optional* parallelism support with GNU Parallel (recommended): | |
# $ sudo apt-get install parallel | |
# | |
# Authors: Aaron Zauner <[email protected]> | |
# License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0) | |
# | |
set -eo pipefail | |
readonly top1m_s3l="https://s3.amazonaws.com/alexa-static/top-1m.csv.zip" | |
readonly top1m_zip=${top1m_s3l##*/} | |
readonly top1m_csv=${top1m_zip%.*} | |
function hosts() { | |
for line in $(<${top1m_csv}); do | |
local host=${line/*,/} | |
printf "%s " ${host} | |
done | |
} | |
function get_mx() { | |
# build JSON object from input hostname mapping | |
# MX records to the corresponding A records of | |
# a given host. including messy in-line formatting. | |
local mx_records=($(dig +short +nosearch +keepopen \ | |
+time=2 mx "${1}" | sed 's/.*\ //')) | |
printf '{\n\t"%s": {\n\t\t"mx_records": [\n' "${1}" | |
for mx in "${mx_records[@]}"; do | |
local ip=($(getent ahostsv4 ${mx})) # in our case, v4 suffices. | |
printf '\t\t\t"%s": \t"%s",\n' "${mx}" "${ip}" | |
done | |
printf "\t\t]\n\t}\n}\n" | |
} | |
# main | |
[[ -e ${top1m_csv} ]] || { | |
wget "${top1m_s3l}" &> /dev/null | |
unzip "${top1m_zip}" &> /dev/null | |
} | |
[[ ${1} == "get_mx" ]] && { | |
get_mx "${2}" && exit 0 | |
} | |
if [[ $(which parallel) ]]; then | |
printf "<< parallel mode >>\n\n" >&2 | |
parallel --progress --colsep ',' "${0} get_mx {2}" :::: ${top1m_csv} | |
else | |
printf "<< sequential mode (slow! install \`parallel\`.) >>\n\n" >&2 | |
for host in $(hosts); do | |
get_mx "${host}" | |
done | |
fi | |
trap '{ rm ${top1m_zip} ${top1m_csv}; \ | |
printf "\n\n<< finished run. >> \ | |
$(date --rfc-3339=ns) \n" >&2 }' EXIT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment