Skip to content

Instantly share code, notes, and snippets.

@rawiriblundell
Last active February 28, 2022 14:31
Show Gist options
  • Save rawiriblundell/38742646602aa42d6ce47345eaaae2c1 to your computer and use it in GitHub Desktop.
Save rawiriblundell/38742646602aa42d6ce47345eaaae2c1 to your computer and use it in GitHub Desktop.
CheckMK local check for capturing available updates
#!/bin/bash
# check_linux_lastpatched - Check when a Linux host was last patched and warn
# if a specified time period is exceeded
# Purpose: checkmk local check to ensure we keep Linux hosts patched
# Author: Rawiri Blundell
# Copyright: See provided LICENCE file
# Date: 20211102 partial rewrite, originally written 20170828
# Usage: ./check_linux_lastpatched [warn threshold (days)] [crit threshold (days)]
# n.b. warn threshold defaults to 9 months, crit threshold to 12.
########################################
service_name="Linux Patching"
# If we want this data to be cached, define a TTL here in seconds
cache_ttl=86400
# If an argument is supplied, we use it,
# otherwise we default to 9 months (rounded up by half a day)
warn_threshold="${1:-274}"
# No host should be unpatched for more than a year, but this can be overridden
# e.g. legacy hosts or version locked hosts
crit_threshold="${2:-365}"
# Now we convert $warn_threshold into an epoch timestamp
warn_thres=$(date -d "${warn_threshold} days ago" +%s)
crit_thres=$(date -d "${crit_threshold} days ago" +%s)
# Define check_version here if you need to specify it
#check_version=
# If check_version isn't defined, we default to the checkmk agent version
# This is an(other) instance where having a global MK_AGENT_VERSION variable would be useful
# If we can't determine the agent version, we failback to 'UNKNOWN'
if (( "${#check_version}" == 0 )); then
if command -v check_mk_agent >/dev/null 2>&1; then
check_version="$(awk '/Version:/{print $3}' "$(command -v check_mk_agent)")"
fi
check_version="${check_version:-UNKNOWN}"
fi
# The following functions are broad approximations of functions originally supplied by libconfmap.sh
# When given with '-x', we exit immediately after printing the output
# This is required sometimes to prevent further processing and unwanted extra output
# This is missing some functionality that I'll try to remember, like multi-line output handling
print_out() {
case "${1}" in
(-x)
shift -n 1
printf -- '%s "%s" %s\n' "${1}" "${service_name}" "${*}"
exit 0
;;
(*)
printf -- '%s "%s" %s\n' "${1}" "${service_name}" "${*}"
;;
esac
}
print_crit() {
case "${1}" in
(-x) print_out -x 2 "${*}" ;;
(*) print_out 2 "${*}" ;;
esac
}
print_debug() {
case "${1}" in
(-x) print_out -x 3 "${*}" ;;
(*) print_out 3 "${*}" ;;
esac
}
print_ok() {
case "${1}" in
(-x) print_out -x 0 "${*}" ;;
(*) print_out 0 "${*}" ;;
esac
}
print_warn() {
case "${1}" in
(-x) print_out -x 1 "${*}" ;;
(*) print_out 1 "${*}" ;;
esac
}
# Check that our cache variable, directory and file are in place
if (( "${#MK_VARDIR}" == 0 )); then
if command -v check_mk_agent >/dev/null 2>&1; then
MK_VARDIR="$(awk -F '=' '/MK_VARDIR:/{print $2}')"
else
print_debug -x "Unable to source env variables from agent"
fi
: "${MK_VARDIR:?MK_VARDIR undefined}"
fi
if [[ ! -d "${MK_VARDIR}/cache" ]]; then
mkdir -p "${MK_VARDIR}/cache" || print_debug -x "Could not create ${MK_VARDIR}/cache"
fi
cache_file="${MK_VARDIR}/cache/${service_name// /_}.cache"
# If cache_ttl is set, we test the cache file's age
# If it's still within the ttl, just dump it and exit
# TO-DO: Build a bit more smarts to auto-renew the cache within its ttl under some conditions
if (( cache_ttl > 0 )); then
if [[ -e "${cache_file}" ]]; then
time_now="${EPOCHSECONDS:-$(date +%s)}"
cache_file_age="$(stat -c %Y "${cache_file}")"
cache_file_threshold=$(( time_now - cache_file_age ))
if (( cache_file_threshold < cache_ttl )); then
cat "${cache_file}"
exit 0
fi
fi
fi
# Capture the current kernel
current_kernel=$(uname -r)
# Convert multiple lines to comma separated format
# See also c2n() for the opposite behaviour
# shellcheck disable=SC2120
n2c() { paste -sd ',' "${1:--}"; }
# A small function to test connectivity to a remote host's port.
# Usage: probe_port [remote host] [port (default: 22)] [tcp/udp (default: tcp)]
probe_port() {
timeout 1 bash -c "</dev/${3:-tcp}/${1:?No target}/${2:-22}" 2>/dev/null
}
# First check if it's a RHEL host
if [[ -f /etc/redhat-release ]]; then
# Possible TO-DO: Add support for 'tracer', requires katello tools e.g. yum install katello-host-tools-tracer
# Try to figure out which Satellite server we're hooked up to
if [[ -f /etc/rhsm/rhsm.conf ]]; then
sat_server=$(awk '/^hostname/{print $3}' /etc/rhsm/rhsm.conf)
sat_port=$(awk '/^port/{print $3}' /etc/rhsm/rhsm.conf)
elif [[ -f /etc/sysconfig/rhn/up2date ]]; then
sat_server=$(awk -F '=' '/serverURL=h/{print $2}' /etc/sysconfig/rhn/up2date)
sat_server=$(cut -d '/' -f3 <<< "${sat_server}")
else
print_debug -x "Could not determine Satellite server for this host (${check_version})"
fi
# If we can't talk to Satellite, there's no reason to go on...
if ! probe_port "${sat_server}" "${sat_port:-443}"; then
print_crit -x "Unable to communicate with Satellite (${sat_server}:${sat_port}) (${check_version})"
fi
# Otherwise, let's grab the version information
rhel_ver_maj=$(rpm --qf='%{VERSION}' -q --whatprovides /etc/redhat-release)
rhel_ver_maj="${rhel_ver_maj:0:1}"
# Preflight check - ensure that yum security is installed where required
if (( rhel_ver_maj < 5 )); then
print_debug -x "This check is compatible with RHEL5 and newer (${check_version})"
elif (( rhel_ver_maj == 5 )); then
if ! rpm -q yum-security >/dev/null 2>&1; then
print_debug -x "'yum-security' package required, but not found (${check_version})"
fi
# Setup yum security output. If we're on RHEL5, behave this way
yumsec() { yum list-security 2>/dev/null; }
elif (( rhel_ver_maj >= 6 )); then
if (( rhel_ver_maj == 6 )) && ! rpm -q yum-plugin-security >/dev/null 2>&1; then
print_debug -x "'yum-plugin-security' package required, but not found (${check_version})"
fi
yumsec() { yum updateinfo list available --quiet; }
fi
# Build two arrays, one with a list of available upgrades
# We do this to prevent multiple potentially slow calls to rpmdb etc
upgrade_array=()
while IFS='' read -r; do
upgrade_array+=("${REPLY}")
done < <(repoquery -q -a --qf="%{name}-%{version}-%{release}.%{arch}" --pkgnarrow=updates)
# The other with all available security patches
yumsec_array=()
while IFS='' read -r; do
yumsec_array+=("${REPLY}")
done < <(yumsec)
# Grab a count of packages that are upgradable
avail_updates="${#upgrade_array[@]}"
# If the upgrade array is populated, then we need to gather more metrics
if (( avail_updates > 0 )); then
# Start by producing a third array.
# We filter yumsec_array down to only what upgrade_array matches.
patch_array=()
while IFS='' read -r; do
patch_array+=("${REPLY}")
done < <(printf -- '%s\n' "${yumsec_array[@]}" | grep -f <(printf -- '%s\n' "${upgrade_array[@]}"))
crit_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Critical/Sec")
crit_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Critical\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c)
important_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Important/Sec")
important_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Important\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c)
moderate_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Moderate/Sec")
moderate_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Moderate\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c)
low_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Low/Sec")
low_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Low\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c)
other_count=$(( avail_updates - (crit_count+important_count+moderate_count+low_count) ))
fi
# We grab the installtime of the last installed version of the 'kernel' package
# This is a fairly reliable indicator of when the host was last patched
# At the same time, we capture the most recent kernel version
read -r _ instTime next_kernel < <(
rpm -q kernel --qf "%{buildtime} %{installtime} %{version}-%{release}.%{arch}\\n" \
| sort \
| tail -n 1
)
# This only matters if patch_array is populated
if (( "${#patch_array[@]}" >= 1 )); then
# Determine the oldest available patch
oldest_patch=$(printf -- '%s\n' "${patch_array[@]}" | sort -t '-' -k2 | awk '{print $3;exit}')
# Get the build time of the oldest available patch
aged_pkg_epoch=$(repoquery -q "${oldest_patch}" --qf="%{name} %{buildtime}")
fi
# And the epoch time from 'warn_threshold' days ago
warn_epoch=$(date -d "${warn_threshold} days ago" +%s)
# Format our extra package info output
IFS='' read -r -d '' extra_output <<EOF
Critical updates: ${crit_count:-0} missing ${crit_output}
Important updates: ${important_count:-0} missing ${important_output}
Moderate updates: ${moderate_count:-0} missing ${moderate_output}
Low updates: ${low_count:-0} missing ${low_output}
Other updates: ${other_count:-0} missing
EOF
# Debian family systems don't have tooling that's quite as developed
# Below are some notes, for anyone who wants to try to fill the gaps
# debsescan seems like a path worth investigating.
# Alternatively:
# Build tool to parse https://usn.ubuntu.com/months/ to replicate https://www.debian.org/security/201x
#elif [[ -f /etc/debian_version ]]; then
# instTime=$(zgrep -h "status installed linux-kernel" /var/log/dpkg.log* | tail -n 1 | awk '{print $1}')
# next_kernel=$(dpkg --list | awk '/linux-image-[0-9]/{print $2}' | cut -d "-" -f3- | tail -n 1)
# if [[ -x /usr/lib/update-notifier/apt-check ]]; then
# read -r avail_updates secUpdates < <(/usr/lib/update-notifier/apt-check --human-readable \
# | awk '{print $1}' \
# | paste -sd ' ' -
# )
# elif ! command -v apt-check &>/dev/null; then
# print_debug -x "'apt-check' not found, please install it into /usr/local/sbin" \
# "Grab it from https://git.launchpad.net/ubuntu/+source/update-notifier/tree/data/apt_check.py"
# else
# avail_updates=$(LANG=C apt-get upgrade -s | grep -P '^\d+ upgraded' | cut -d" " -f1)
# # This sometimes works, sometimes doesn't... seems to depend on the version of Debian/Ubuntu
# # Consider it an unreliable option, possibly it needs to be followed up with something like
# # (( secUpdates = 0 )) && secUpdates="${avail_updates}"
# secUpdates=$(LANG=C apt-get upgrade -s | grep -ci "-security")
# #secUpdates=$(apt-get -s -o Debug::NoLocking=true upgrade | grep ^Inst) #alternative option
# fi
#
# get list of package names where updates are available
# apt-get -s -o Debug::NoLocking=true upgrade | awk -F ' \\[' '/^Inst/{print $1}' | awk '{print $2}'
# # Some notes
# Try to see how apt-check does it
# aptPkg_array=()
# while IFS='' read -r; do
# aptPkg_array+=("${REPLY}")
# done < <(apt-get -s -o Debug::NoLocking=true upgrade | awk -F ' \\[' '/^Inst/{print $1}' | awk '{print $2}')
# apt-cache madison "${aptPkg_array[@]}"
#
#
else
print_debug -x "This check is currently RHEL specific (${check_version})"
fi
# Finally, we subtract the last patch date from today's date and convert
patch_age=$(( ($(date +%s) - instTime) / 86400 ))
# Format our performance data
perf_data="Age=${patch_age};${warn_threshold};${crit_threshold};0;730|Updates=${avail_updates};1;;;"
# Format our output tail
output_tail="(Approx ${patch_age} days ago), ${avail_updates} packages have upgrades available (${check_version})"
{
# Now that we've gathered what we want, start by testing the kernel versions
# Note that this is a somewhat crude measure of whether a host requires a reboot
# On Debian family hosts there is /var/run/reboot-required
# On *some* RedHat family hosts, there is the 'needs-restarting' command
# Implementing handling for those is getting into feature-creep territory
if [[ "${current_kernel}" != "${next_kernel}" ]]; then
print_warn -x "${perf_data}" \
"System has a newer kernel installed and needs to be rebooted" \
"Current: ${current_kernel}" \
"Latest: ${next_kernel}" \
"(${check_version})"
fi
# Let's start some testing!
# If the last patch time has exceeded our critical boundary, alert
if (( instTime <= crit_thres )); then
print_crit -x "${perf_data}" \
"System last patched more than ${crit_threshold} days ago ${output_tail}" \
"${extra_output}"
# If the last patch time has exceeded our warning boundary, alert
elif (( instTime <= warn_thres )); then
print_warn -x "${perf_data}" \
"System last patched more than ${warn_threshold} days ago ${output_tail}" \
"${extra_output}"
# If we're within a patching period, with outstanding patches older than the warn boundary, alert
elif (( avail_updates >= 1 ))&&(( aged_pkg_epoch >= warn_epoch )); then
print_crit -x "${perf_data}" \
"System patched within the last ${warn_threshold} days ${output_tail}" \
"${extra_output}"
# If we're within a patching period, with outstanding patches newer than the warn boundary, alert
elif (( avail_updates >= 1 ))&&(( aged_pkg_epoch < warn_epoch )); then
print_warn -x "${perf_data}" \
"System patched within the last ${warn_threshold} days ${output_tail}" \
"${extra_output}"
# Patched within the cycle and all available updates installed? All ok then.
else
print_ok -x "${perf_data}" \
"System patched within the last ${warn_threshold} days ${output_tail}"
fi
} > "${cache_file}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment