Skip to content

Instantly share code, notes, and snippets.

@pracucci
Created June 25, 2019 20:18
Show Gist options
  • Save pracucci/8e8d0eecfa6fa603be9e239e6b5fd396 to your computer and use it in GitHub Desktop.
Save pracucci/8e8d0eecfa6fa603be9e239e6b5fd396 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# On 2018-08-21 we found out the existance of a bug in the Kubernetes cluster
# which leaves stale / dangling cgroups on the system related to secret volume
# mounts.
#
# The root cause of this bug is not clear yet, even if it's likely to be a systemd
# bug because the kubelet runs `systemd-run` command to mount secret volumes and,
# according to logs, successfully `unmount` such volumes once the pods terminates.
#
# Systemd should (and sometimes does) stop the systemd unit (and release its cgroups)
# once the mount is unmounted, but sometimes it doesn't. For this reason, it may be
# a systemd bug.
#
# Log functions
log_info()
{
logger -s -p user.notice "cleanup-stale-cgroups.sh: $1"
}
# List all transient mount units
log_info "Listing all systemd units related to 'Kubernetes transient mount' to find stale cgroups to delete"
ENTRIES=$(systemctl list-units | grep -E "^[^ ]+ .*Kubernetes transient mount .*/\var/\lib/\kubelet/\pods\/[^\/]+")
if [ $? -eq 1 ]; then
log_info "No systemd units related to 'Kubernetes transient mount' found"
exit 0
fi
# Set the newline as field separator so that we can iterate
# on lines
IFS=$'\n'
# For each transient mount systemd unit check if the pod exists. If not,
# stop the unit to cleanup the related cgroups.
log_info "Checking all systemd units related to 'Kubernetes transient mount' to find stale cgroups to delete"
for ENTRY in $ENTRIES; do
# Get the unit name and pod path
UNIT_NAME=$(echo "$ENTRY" | grep -Eo '^[^ ]+')
POD_PATH=$(echo "$ENTRY" | grep -Eo '/\var/\lib/\kubelet/\pods\/[^\/]+')
if [ ! -e "$POD_PATH" ]; then
log_info "Stopping systemd unit ${UNIT_NAME} related to ${POD_PATH}"
systemctl stop $UNIT_NAME
fi
done
log_info "Cleanup successfully completed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment