Last active
August 15, 2023 03:48
-
-
Save augustohp/fdac0243bbd14ca3d1dff09725726a10 to your computer and use it in GitHub Desktop.
Given a broken URL, one that doesn't work anymore, finds the lasting working URL (you can choose how far back to go) using Wayback Machine.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
# | |
# Helps links on your blog/docs to stay healthy. | |
# | |
# vim: ft=sh sw=4 ts=4 noet: | |
APP_NAME=$(basename "$0") | |
APP_VERSION="1.0.0" | |
APP_AUTHOR="[email protected]" | |
DEFAULT_NOW=$(date -u +"%Y%m%d%H%M%S") | |
OPTION_TIMESTAMP="${WAYBACK_TIMESTAMP:-$DEFAULT_NOW}" | |
trap cleanup INT TERM EXIT | |
cleanup() | |
{ | |
echo "" > /dev/null | |
} | |
sanity_check() | |
{ | |
for dependency in jq curl | |
do | |
command_path="$(command -v $dependency)" | |
if [ -z "$command_path" ] | |
then | |
echo "Error: $dependency is required." >&2 | |
fi | |
done | |
} | |
display_help() | |
{ | |
cat <<-EOF | |
Usage: $APP_NAME [options] <link> [link2 [link3] ...] | |
$APP_NAME --help | |
$APP_NAME --version | |
Options: | |
-h | --help Displays help and exits. | |
-v | --version Displays version and exits. | |
-x | --debug Show commands (-x) before executing them. | |
-t | --timestamp <YYYYmmdd> Fetch site before given date/time, you | |
can also specify time with HHiiss format. | |
(Default: $OPTION_TIMESTAMP) | |
Environment variables | |
WAYBACK_TIMESTAMP Used on --timestamp option. If not set, | |
defaults to "now". | |
(Default: $DEFAULT_NOW) | |
Suggestion and bugs to $APP_AUTHOR. | |
EOF | |
} | |
parse_options() | |
{ | |
while :; | |
do | |
case ${1:-} in | |
-h|--help) | |
display_help | |
exit 0 | |
;; | |
-x|--debug) | |
set -x | |
;; | |
-v|--version) | |
echo "$APP_VERSION" | |
exit 0 | |
;; | |
-t|--timestamp) | |
OPTION_TIMESTAMP="$2" | |
shift | |
;; | |
--|*) | |
break | |
;; | |
esac | |
shift | |
done | |
} | |
main() | |
{ | |
while [ $# -gt 0 ] | |
do | |
case "${1:-}" in | |
-?*) | |
# "--option" parsed elsewhere | |
shift | |
;; | |
*) | |
url="$1" | |
fetch_archived_url "$url" | |
shift | |
;; | |
esac | |
done | |
} | |
# Uses Archive.Org to find a working <URL> on Wayback Machine. | |
# Timestamp is in format <YYYYMMDD>[hhmmss]. | |
# | |
# Usage: fetch_archived_url <url> [timestamp] | |
# Example: fetch_archived_url "https://example.org" "20112131245959" | |
fetch_archived_url() | |
{ | |
site="$1" | |
timestamp="${2:-$OPTION_TIMESTAMP}" | |
url="https://archive.org/wayback/available?url=${site}×tamp=${timestamp}" | |
curl --silent --location "$url" \ | |
| jq '.archived_snapshots.closest.url' \ | |
| sed 's/"//g' | |
} | |
sanity_check | |
parse_options "$@" | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment