Last active
February 28, 2022 14:31
-
-
Save rawiriblundell/38742646602aa42d6ce47345eaaae2c1 to your computer and use it in GitHub Desktop.
CheckMK local check for capturing available updates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# check_linux_lastpatched - Check when a Linux host was last patched and warn | |
# if a specified time period is exceeded | |
# Purpose: checkmk local check to ensure we keep Linux hosts patched | |
# Author: Rawiri Blundell | |
# Copyright: See provided LICENCE file | |
# Date: 20211102 partial rewrite, originally written 20170828 | |
# Usage: ./check_linux_lastpatched [warn threshold (days)] [crit threshold (days)] | |
# n.b. warn threshold defaults to 9 months, crit threshold to 12. | |
######################################## | |
service_name="Linux Patching" | |
# If we want this data to be cached, define a TTL here in seconds | |
cache_ttl=86400 | |
# If an argument is supplied, we use it, | |
# otherwise we default to 9 months (rounded up by half a day) | |
warn_threshold="${1:-274}" | |
# No host should be unpatched for more than a year, but this can be overridden | |
# e.g. legacy hosts or version locked hosts | |
crit_threshold="${2:-365}" | |
# Now we convert $warn_threshold into an epoch timestamp | |
warn_thres=$(date -d "${warn_threshold} days ago" +%s) | |
crit_thres=$(date -d "${crit_threshold} days ago" +%s) | |
# Define check_version here if you need to specify it | |
#check_version= | |
# If check_version isn't defined, we default to the checkmk agent version | |
# This is an(other) instance where having a global MK_AGENT_VERSION variable would be useful | |
# If we can't determine the agent version, we failback to 'UNKNOWN' | |
if (( "${#check_version}" == 0 )); then | |
if command -v check_mk_agent >/dev/null 2>&1; then | |
check_version="$(awk '/Version:/{print $3}' "$(command -v check_mk_agent)")" | |
fi | |
check_version="${check_version:-UNKNOWN}" | |
fi | |
# The following functions are broad approximations of functions originally supplied by libconfmap.sh | |
# When given with '-x', we exit immediately after printing the output | |
# This is required sometimes to prevent further processing and unwanted extra output | |
# This is missing some functionality that I'll try to remember, like multi-line output handling | |
print_out() { | |
case "${1}" in | |
(-x) | |
shift -n 1 | |
printf -- '%s "%s" %s\n' "${1}" "${service_name}" "${*}" | |
exit 0 | |
;; | |
(*) | |
printf -- '%s "%s" %s\n' "${1}" "${service_name}" "${*}" | |
;; | |
esac | |
} | |
print_crit() { | |
case "${1}" in | |
(-x) print_out -x 2 "${*}" ;; | |
(*) print_out 2 "${*}" ;; | |
esac | |
} | |
print_debug() { | |
case "${1}" in | |
(-x) print_out -x 3 "${*}" ;; | |
(*) print_out 3 "${*}" ;; | |
esac | |
} | |
print_ok() { | |
case "${1}" in | |
(-x) print_out -x 0 "${*}" ;; | |
(*) print_out 0 "${*}" ;; | |
esac | |
} | |
print_warn() { | |
case "${1}" in | |
(-x) print_out -x 1 "${*}" ;; | |
(*) print_out 1 "${*}" ;; | |
esac | |
} | |
# Check that our cache variable, directory and file are in place | |
if (( "${#MK_VARDIR}" == 0 )); then | |
if command -v check_mk_agent >/dev/null 2>&1; then | |
MK_VARDIR="$(awk -F '=' '/MK_VARDIR:/{print $2}')" | |
else | |
print_debug -x "Unable to source env variables from agent" | |
fi | |
: "${MK_VARDIR:?MK_VARDIR undefined}" | |
fi | |
if [[ ! -d "${MK_VARDIR}/cache" ]]; then | |
mkdir -p "${MK_VARDIR}/cache" || print_debug -x "Could not create ${MK_VARDIR}/cache" | |
fi | |
cache_file="${MK_VARDIR}/cache/${service_name// /_}.cache" | |
# If cache_ttl is set, we test the cache file's age | |
# If it's still within the ttl, just dump it and exit | |
# TO-DO: Build a bit more smarts to auto-renew the cache within its ttl under some conditions | |
if (( cache_ttl > 0 )); then | |
if [[ -e "${cache_file}" ]]; then | |
time_now="${EPOCHSECONDS:-$(date +%s)}" | |
cache_file_age="$(stat -c %Y "${cache_file}")" | |
cache_file_threshold=$(( time_now - cache_file_age )) | |
if (( cache_file_threshold < cache_ttl )); then | |
cat "${cache_file}" | |
exit 0 | |
fi | |
fi | |
fi | |
# Capture the current kernel | |
current_kernel=$(uname -r) | |
# Convert multiple lines to comma separated format | |
# See also c2n() for the opposite behaviour | |
# shellcheck disable=SC2120 | |
n2c() { paste -sd ',' "${1:--}"; } | |
# A small function to test connectivity to a remote host's port. | |
# Usage: probe_port [remote host] [port (default: 22)] [tcp/udp (default: tcp)] | |
probe_port() { | |
timeout 1 bash -c "</dev/${3:-tcp}/${1:?No target}/${2:-22}" 2>/dev/null | |
} | |
# First check if it's a RHEL host | |
if [[ -f /etc/redhat-release ]]; then | |
# Possible TO-DO: Add support for 'tracer', requires katello tools e.g. yum install katello-host-tools-tracer | |
# Try to figure out which Satellite server we're hooked up to | |
if [[ -f /etc/rhsm/rhsm.conf ]]; then | |
sat_server=$(awk '/^hostname/{print $3}' /etc/rhsm/rhsm.conf) | |
sat_port=$(awk '/^port/{print $3}' /etc/rhsm/rhsm.conf) | |
elif [[ -f /etc/sysconfig/rhn/up2date ]]; then | |
sat_server=$(awk -F '=' '/serverURL=h/{print $2}' /etc/sysconfig/rhn/up2date) | |
sat_server=$(cut -d '/' -f3 <<< "${sat_server}") | |
else | |
print_debug -x "Could not determine Satellite server for this host (${check_version})" | |
fi | |
# If we can't talk to Satellite, there's no reason to go on... | |
if ! probe_port "${sat_server}" "${sat_port:-443}"; then | |
print_crit -x "Unable to communicate with Satellite (${sat_server}:${sat_port}) (${check_version})" | |
fi | |
# Otherwise, let's grab the version information | |
rhel_ver_maj=$(rpm --qf='%{VERSION}' -q --whatprovides /etc/redhat-release) | |
rhel_ver_maj="${rhel_ver_maj:0:1}" | |
# Preflight check - ensure that yum security is installed where required | |
if (( rhel_ver_maj < 5 )); then | |
print_debug -x "This check is compatible with RHEL5 and newer (${check_version})" | |
elif (( rhel_ver_maj == 5 )); then | |
if ! rpm -q yum-security >/dev/null 2>&1; then | |
print_debug -x "'yum-security' package required, but not found (${check_version})" | |
fi | |
# Setup yum security output. If we're on RHEL5, behave this way | |
yumsec() { yum list-security 2>/dev/null; } | |
elif (( rhel_ver_maj >= 6 )); then | |
if (( rhel_ver_maj == 6 )) && ! rpm -q yum-plugin-security >/dev/null 2>&1; then | |
print_debug -x "'yum-plugin-security' package required, but not found (${check_version})" | |
fi | |
yumsec() { yum updateinfo list available --quiet; } | |
fi | |
# Build two arrays, one with a list of available upgrades | |
# We do this to prevent multiple potentially slow calls to rpmdb etc | |
upgrade_array=() | |
while IFS='' read -r; do | |
upgrade_array+=("${REPLY}") | |
done < <(repoquery -q -a --qf="%{name}-%{version}-%{release}.%{arch}" --pkgnarrow=updates) | |
# The other with all available security patches | |
yumsec_array=() | |
while IFS='' read -r; do | |
yumsec_array+=("${REPLY}") | |
done < <(yumsec) | |
# Grab a count of packages that are upgradable | |
avail_updates="${#upgrade_array[@]}" | |
# If the upgrade array is populated, then we need to gather more metrics | |
if (( avail_updates > 0 )); then | |
# Start by producing a third array. | |
# We filter yumsec_array down to only what upgrade_array matches. | |
patch_array=() | |
while IFS='' read -r; do | |
patch_array+=("${REPLY}") | |
done < <(printf -- '%s\n' "${yumsec_array[@]}" | grep -f <(printf -- '%s\n' "${upgrade_array[@]}")) | |
crit_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Critical/Sec") | |
crit_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Critical\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c) | |
important_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Important/Sec") | |
important_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Important\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c) | |
moderate_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Moderate/Sec") | |
moderate_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Moderate\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c) | |
low_count=$(printf -- '%s\n' "${patch_array[@]}" | grep -ci "Low/Sec") | |
low_output=$(printf -- '%s\n' "${patch_array[@]}" | awk '/Low\/Sec/{print $1}' | sort -t '-' -k2 | uniq | n2c) | |
other_count=$(( avail_updates - (crit_count+important_count+moderate_count+low_count) )) | |
fi | |
# We grab the installtime of the last installed version of the 'kernel' package | |
# This is a fairly reliable indicator of when the host was last patched | |
# At the same time, we capture the most recent kernel version | |
read -r _ instTime next_kernel < <( | |
rpm -q kernel --qf "%{buildtime} %{installtime} %{version}-%{release}.%{arch}\\n" \ | |
| sort \ | |
| tail -n 1 | |
) | |
# This only matters if patch_array is populated | |
if (( "${#patch_array[@]}" >= 1 )); then | |
# Determine the oldest available patch | |
oldest_patch=$(printf -- '%s\n' "${patch_array[@]}" | sort -t '-' -k2 | awk '{print $3;exit}') | |
# Get the build time of the oldest available patch | |
aged_pkg_epoch=$(repoquery -q "${oldest_patch}" --qf="%{name} %{buildtime}") | |
fi | |
# And the epoch time from 'warn_threshold' days ago | |
warn_epoch=$(date -d "${warn_threshold} days ago" +%s) | |
# Format our extra package info output | |
IFS='' read -r -d '' extra_output <<EOF | |
Critical updates: ${crit_count:-0} missing ${crit_output} | |
Important updates: ${important_count:-0} missing ${important_output} | |
Moderate updates: ${moderate_count:-0} missing ${moderate_output} | |
Low updates: ${low_count:-0} missing ${low_output} | |
Other updates: ${other_count:-0} missing | |
EOF | |
# Debian family systems don't have tooling that's quite as developed | |
# Below are some notes, for anyone who wants to try to fill the gaps | |
# debsescan seems like a path worth investigating. | |
# Alternatively: | |
# Build tool to parse https://usn.ubuntu.com/months/ to replicate https://www.debian.org/security/201x | |
#elif [[ -f /etc/debian_version ]]; then | |
# instTime=$(zgrep -h "status installed linux-kernel" /var/log/dpkg.log* | tail -n 1 | awk '{print $1}') | |
# next_kernel=$(dpkg --list | awk '/linux-image-[0-9]/{print $2}' | cut -d "-" -f3- | tail -n 1) | |
# if [[ -x /usr/lib/update-notifier/apt-check ]]; then | |
# read -r avail_updates secUpdates < <(/usr/lib/update-notifier/apt-check --human-readable \ | |
# | awk '{print $1}' \ | |
# | paste -sd ' ' - | |
# ) | |
# elif ! command -v apt-check &>/dev/null; then | |
# print_debug -x "'apt-check' not found, please install it into /usr/local/sbin" \ | |
# "Grab it from https://git.launchpad.net/ubuntu/+source/update-notifier/tree/data/apt_check.py" | |
# else | |
# avail_updates=$(LANG=C apt-get upgrade -s | grep -P '^\d+ upgraded' | cut -d" " -f1) | |
# # This sometimes works, sometimes doesn't... seems to depend on the version of Debian/Ubuntu | |
# # Consider it an unreliable option, possibly it needs to be followed up with something like | |
# # (( secUpdates = 0 )) && secUpdates="${avail_updates}" | |
# secUpdates=$(LANG=C apt-get upgrade -s | grep -ci "-security") | |
# #secUpdates=$(apt-get -s -o Debug::NoLocking=true upgrade | grep ^Inst) #alternative option | |
# fi | |
# | |
# get list of package names where updates are available | |
# apt-get -s -o Debug::NoLocking=true upgrade | awk -F ' \\[' '/^Inst/{print $1}' | awk '{print $2}' | |
# # Some notes | |
# Try to see how apt-check does it | |
# aptPkg_array=() | |
# while IFS='' read -r; do | |
# aptPkg_array+=("${REPLY}") | |
# done < <(apt-get -s -o Debug::NoLocking=true upgrade | awk -F ' \\[' '/^Inst/{print $1}' | awk '{print $2}') | |
# apt-cache madison "${aptPkg_array[@]}" | |
# | |
# | |
else | |
print_debug -x "This check is currently RHEL specific (${check_version})" | |
fi | |
# Finally, we subtract the last patch date from today's date and convert | |
patch_age=$(( ($(date +%s) - instTime) / 86400 )) | |
# Format our performance data | |
perf_data="Age=${patch_age};${warn_threshold};${crit_threshold};0;730|Updates=${avail_updates};1;;;" | |
# Format our output tail | |
output_tail="(Approx ${patch_age} days ago), ${avail_updates} packages have upgrades available (${check_version})" | |
{ | |
# Now that we've gathered what we want, start by testing the kernel versions | |
# Note that this is a somewhat crude measure of whether a host requires a reboot | |
# On Debian family hosts there is /var/run/reboot-required | |
# On *some* RedHat family hosts, there is the 'needs-restarting' command | |
# Implementing handling for those is getting into feature-creep territory | |
if [[ "${current_kernel}" != "${next_kernel}" ]]; then | |
print_warn -x "${perf_data}" \ | |
"System has a newer kernel installed and needs to be rebooted" \ | |
"Current: ${current_kernel}" \ | |
"Latest: ${next_kernel}" \ | |
"(${check_version})" | |
fi | |
# Let's start some testing! | |
# If the last patch time has exceeded our critical boundary, alert | |
if (( instTime <= crit_thres )); then | |
print_crit -x "${perf_data}" \ | |
"System last patched more than ${crit_threshold} days ago ${output_tail}" \ | |
"${extra_output}" | |
# If the last patch time has exceeded our warning boundary, alert | |
elif (( instTime <= warn_thres )); then | |
print_warn -x "${perf_data}" \ | |
"System last patched more than ${warn_threshold} days ago ${output_tail}" \ | |
"${extra_output}" | |
# If we're within a patching period, with outstanding patches older than the warn boundary, alert | |
elif (( avail_updates >= 1 ))&&(( aged_pkg_epoch >= warn_epoch )); then | |
print_crit -x "${perf_data}" \ | |
"System patched within the last ${warn_threshold} days ${output_tail}" \ | |
"${extra_output}" | |
# If we're within a patching period, with outstanding patches newer than the warn boundary, alert | |
elif (( avail_updates >= 1 ))&&(( aged_pkg_epoch < warn_epoch )); then | |
print_warn -x "${perf_data}" \ | |
"System patched within the last ${warn_threshold} days ${output_tail}" \ | |
"${extra_output}" | |
# Patched within the cycle and all available updates installed? All ok then. | |
else | |
print_ok -x "${perf_data}" \ | |
"System patched within the last ${warn_threshold} days ${output_tail}" | |
fi | |
} > "${cache_file}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment