Created
August 2, 2021 01:37
-
-
Save mkroman/5d0ab1178a1c4631a332f9527d300f55 to your computer and use it in GitHub Desktop.
Simple script that fetches a page and extracts a css selector element and compares it with a cached version
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# This is a simple script that fetches the page from a given url and extracts a | |
# specific element using a CSS selector and compares it with the last invocation | |
# of this command with the same arguments. | |
# | |
# Some requests might not actually be HTML, but pup will parse it as HTML as if | |
# it had a body regardless, so one can still use the "body" css selector for | |
# non-html files. | |
# | |
# When the script is run for the first time, it writes the element to the cache | |
# and exits with exit code 1. | |
# | |
# When the script is run for the nth time, it compares the current html element | |
# with the cached element and exits with the exit code 0 if they're different, | |
# and exit code 1 otherwise. | |
# | |
# Example: | |
# | |
# pupdiff "https://www.cnx-software.com/" "#main article:first-child .entry-title" \ | |
# && echo "Latest article changed" || echo "No change since last we checked" | |
if [ $# -ne 2 ]; then | |
echo "Usage: $0 <url> <selector>" | |
exit 1 | |
fi | |
if ! command -v pup &> /dev/null ; then | |
echo "This script requires \`pup\` to be in your PATH!" | |
echo "Please install https://github.com/ericchiang/pup" | |
exit 127 | |
fi | |
if ! command -v curl &> /dev/null ; then | |
echo "This script requires \`curl\`!" | |
exit 127 | |
fi | |
url="${1}" | |
selector="${2}" | |
# DO NOT TOUCH. | |
_cache_dir="${HOME}/.cache/htmldiff" | |
_cache_file=$(echo -n "${url} ${selector}" | sha256sum | awk '{ print $1 }') | |
_cache_file_path="${_cache_dir}/${_cache_file}" | |
# Ensure cache directory exists | |
[ ! -d "${_cache_dir}" ] && mkdir -p "${_cache_dir}" | |
page=$(curl -s "${url}" 2>&1) | |
element=$(echo -n "${page}" | pup "${selector}") | |
if [ "${element}" == "" ]; then | |
echo 'The selector did not match any elements!' | |
exit 1 | |
fi | |
if [ ! -e "${_cache_file_path}" ]; then | |
# This is the first time we're fetching this page | |
echo "-- INFO: Exiting with code 2 as this is the first time we're running" | |
echo -n "${element}" > "${_cache_file_path}" | |
exit 1 | |
fi | |
if echo -n "${element}" | cmp "${_cache_file_path}" -; then | |
# The element is the same as the previous run, exit with exit code 1 | |
exit 1 | |
else | |
# The element differs, write it to cache and exit successfully | |
echo -n "${element}" > "${_cache_file_path}" | |
exit 0 | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment