Skip to content

Instantly share code, notes, and snippets.

@ffppmm
Forked from aneagoe/cm-garbage-collector.yaml
Created January 20, 2023 08:46
Show Gist options
  • Save ffppmm/a35f94aaab5f016f23fcde290727e001 to your computer and use it in GitHub Desktop.
Save ffppmm/a35f94aaab5f016f23fcde290727e001 to your computer and use it in GitHub Desktop.
k8s garbage collector daemon set (for https://github.com/kubernetes/kubernetes/issues/106957)
---
apiVersion: v1
metadata:
name: gc-script
namespace: garbage-collector
kind: ConfigMap
data:
gc: |-
#!/bin/bash
# set defaults
SLEEP_INTERVAL=300
POD_SCOPES=()
logger()
{
echo "`TZ=UTC date --iso-8601=seconds` $@"
}
usage()
{
echo "usage: $0 [-s SLEEP_INTERVAL (seconds)]"
}
while getopts ":s:h" opt; do
case $opt in
h )
usage
exit 0
;;
s )
SLEEP_INTERVAL=${OPTARG}
;;
esac
done
if [[ ${SLEEP_INTERVAL} != ?(-)+([0-9]) ]]; then
logger "${SLEEP_INTERVAL} is not an integer"
usage
exit 1
fi
gc_pods(){
POD_IDS=($(crictl pods -q))
POD_SCOPES=()
for POD_ID in ${POD_IDS[@]}; do
JSONDUMP="`crictl inspectp ${POD_ID}`"
POD_NAME="`echo ${JSONDUMP} | jq -r '.status.metadata.name'`"
POD_SCOPE="`echo ${JSONDUMP} | jq -r '.info.runtimeSpec.linux.cgroupsPath' | awk -F: '{print "crio-" $NF ".scope"}'`"
if [[ $? -ne 0 || -z "${POD_SCOPE}" ]]; then
logger "Error fetching pod SCOPE for pod with ID ${POD_ID}"
continue
else
POD_SCOPES+=($POD_SCOPE)
fi
POD_NAMESPACE="`echo ${JSONDUMP} | jq -r '.status.metadata.namespace'`"
if [[ $? -ne 0 || -z "${POD_NAMESPACE}" ]]; then
logger "Error fetching pod NAMESPACE for pod with ID ${POD_ID}"
continue
fi
POD_CREATED="`echo ${JSONDUMP} | jq -r '.status.createdAt'`"
if [[ $? -ne 0 || -z "${POD_CREATED}" ]]; then
logger "Error fetching pod created timestamp for pod with ID ${POD_ID}"
continue
fi
POD_NS="`echo ${JSONDUMP} | jq -r '.info.runtimeSpec.linux.namespaces[]|.path' | grep run | awk -F\/ '{print $NF}' | sort -u`"
if [[ $? -ne 0 || -z "${POD_NS}" ]]; then
logger "Error fetching pod namespace for pod with ID ${POD_ID}"
continue
fi
if ip netns list | grep -q ${POD_NS}; then
POD_PIDS=($(ip netns pids ${POD_NS}))
if [[ $? -ne 0 ]]; then
logger "Error fetching pod PIDs for pod ${POD_NAME}"
continue
fi
else
POD_PIDS=()
fi
# check if pod is known to k8s control plane
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get pod ${POD_NAME} -n ${POD_NAMESPACE} &>/dev/null
if [[ $? -ne 0 ]]; then
# additional check for safety, making sure that if there's a problem with apiserver we don't blindly remove pods with running processes
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get --raw='/readyz' &>/dev/null
if [[ $? -ne 0 ]]; then
logger "Kubernetes API unavailable. Could be false positive so skipping deletion of POD ${POD_NAME}"
continue
fi
if [[ ${#POD_PIDS[@]} -eq 0 ]]; then
logger "Found POD ${POD_NAME} unknown to k8s control plane and without any PIDs, will delete it..."
crictl stopp ${POD_ID} || logger "Failed to stop POD ${POD_NAME}"
crictl rmp ${POD_ID} || logger "Error removing POD ${POD_NAME}"
fi
# else
# logger "Pod ${POD_NAME} in namespace ${POD_NAMESPACE} is still known to control plane, skipping..."
fi
done
}
gc_cgroups(){
CGROUPDIRS=($(find /sys/fs/cgroup -type d))
LEFTOVER_SCOPES_TMP=($(journalctl --since "10m ago" | egrep 'Failed to update stats for container|Failed to create existing container' | grep -o 'crio-.*scope' | sort -u))
for PODID in `journalctl --since "10 ago" | grep 'Unable to fetch pod log stats' | grep -o '\/var.*:' | tr -d ':' | awk -F_ '{print $NF}' | sed 's/-/_/g' | sort -u`; do
LEFTOVER_SCOPES_TMP+=($(printf -- '%s\n' "${testarray[@]}" | grep ${PODID} | grep -o crio.*$))
done
LEFTOVER_SCOPES=($(printf -- '%s\n' "${LEFTOVER_SCOPES_TMP[@]}" | sort -u))
for SCOPENAME in ${LEFTOVER_SCOPES[@]}; do
if [[ " ${POD_SCOPES[*]} " =~ " ${SCOPENAME} " ]]; then
logger "Scope ${SCOPENAME} found under running pod, skipping..."
continue
else
for SCOPE in `printf -- '%s\n' "${CGROUPDIRS[@]}" | grep ${SCOPENAME}`; do
logger "Removing CGROUP ${SCOPENAME} and its parent..."
rmdir ${SCOPE}
if [[ $? -eq 0 ]]; then
rmdir `dirname ${SCOPE}`
if [[ $? -ne 0 ]]; then
logger "Failed to remove parent for CGROUP ${SCOPE}..."
fi
fi
done
fi
done
}
# sleep for 1m to allow garbage collector to setup properly and avoid pod start race-condition
sleep 60
while true; do
logger "Starting k8s garbage collector run..."
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get --raw='/readyz' &>/dev/null
if [[ $? -ne 0 ]]; then
logger "Kubernetes API unavailable. Cancelling run."
else
gc_pods
gc_cgroups
fi
logger "Sleeping for ${SLEEP_INTERVAL} seconds..."
sleep ${SLEEP_INTERVAL}
done
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: k8s-gc
namespace: garbage-collector
spec:
revisionHistoryLimit: 10
selector:
matchLabels:
app: k8s-gc
template:
metadata:
creationTimestamp: null
labels:
app: k8s-gc
spec:
containers:
- command: ["/bin/sh"]
args: ["-c", "cp /tmp/gc.sh /host/tmp/gc.sh ; chmod +x /host/tmp/gc.sh ; chroot /host ./tmp/gc.sh -s $SLEEP_INTERVAL"]
image: image-registry.openshift-image-registry.svc:5000/openshift/cli
imagePullPolicy: IfNotPresent
name: garbage-collector
env:
- name: SLEEP_INTERVAL
value: "600"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- mountPath: /host
name: host
- mountPath: "/tmp/gc.sh"
subPath: gc
name: gc-script
enableServiceLinks: true
hostNetwork: true
hostPID: true
serviceAccount: garbage-collector
serviceAccountName: garbage-collector
terminationGracePeriodSeconds: 30
volumes:
- name: gc-script
configMap:
name: gc-script
- hostPath:
path: /
type: Directory
name: host
nodeSelector:
kubernetes.io/os: linux
tolerations:
- operator: Exists
updateStrategy:
type: OnDelete
apiVersion: v1
kind: Namespace
metadata:
name: garbage-collector
annotations:
openshift.io/node-selector: ""
labels:
openshift.io/cluster-monitoring: "true"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: garbage-collector
namespace: garbage-collector
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: system:openshift:scc:anyuid
namespace: garbage-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:openshift:scc:anyuid
subjects:
- kind: ServiceAccount
name: garbage-collector
namespace: garbage-collector
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: system:openshift:scc:privileged
namespace: garbage-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:openshift:scc:privileged
subjects:
- kind: ServiceAccount
name: garbage-collector
namespace: garbage-collector
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment