Created
March 2, 2021 11:41
-
-
Save victorkane/e964cb1a77d1a5185375e97fbe5378c4 to your computer and use it in GitHub Desktop.
Alain Kelder's Little shell script to recursively check a site for broken links copied from http://giantdorks.org/alain/little-shell-script-to-recursively-check-a-site-for-broken-links/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# copy the content to a file and save it. | |
# make the file executable with: | |
# chmod +x your_file | |
# Now actually run it ./your_file yourURL | |
# | |
# See results as they happen: | |
# tail -f /tmp/yourURL.log | |
# error handling | |
function err_exit { echo -e 1>&2; exit 1; } | |
# check if proper arguments are supplied | |
if [ $# -ne 1 ]; then | |
echo -e "\n Usage error!\n Please provide URL to check.\n Example: $0 http://example.com\n" | |
exit 1 | |
fi | |
# check if wget is a valid command | |
if ! which wget &> /dev/null; then echo wget not found; exit 1; fi | |
# normalize url for log name | |
url=$(echo $1 | sed -r 's_https?://__;s/www\.//;s_/_._g;s/\.+/\./g;s/\.$//') | |
# remove log if exists | |
if [ -f /tmp/$url.log ]; then | |
echo "Removing existing log.." | |
rm /tmp/$url.log || err_exit | |
fi | |
wget -e robots=off --spider -S -r -nH -nd --delete-after $1 &> /tmp/$url.log & | |
while [ $(pgrep -l -f $url | grep wget | wc -l) != 0 ]; do | |
sleep 3 | |
total=$(grep "HTTP request sent" /tmp/$url.log | wc -l) | |
echo "$total HTTP requests sent thus far" | |
done | |
echo -e "\nAll done, calculating response codes.." | |
echo -e "\nResponse counts, sorted by HTTP code" | |
grep -A1 "^HTTP request sent" /tmp/$url.log |egrep -o "[0-9]{3} [A-Za-z]+(.*)" |sort |uniq -c |sort -nr || err_exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment