-
-
Save ffppmm/a35f94aaab5f016f23fcde290727e001 to your computer and use it in GitHub Desktop.
k8s garbage collector daemon set (for https://github.com/kubernetes/kubernetes/issues/106957)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
apiVersion: v1 | |
metadata: | |
name: gc-script | |
namespace: garbage-collector | |
kind: ConfigMap | |
data: | |
gc: |- | |
#!/bin/bash | |
# set defaults | |
SLEEP_INTERVAL=300 | |
POD_SCOPES=() | |
logger() | |
{ | |
echo "`TZ=UTC date --iso-8601=seconds` $@" | |
} | |
usage() | |
{ | |
echo "usage: $0 [-s SLEEP_INTERVAL (seconds)]" | |
} | |
while getopts ":s:h" opt; do | |
case $opt in | |
h ) | |
usage | |
exit 0 | |
;; | |
s ) | |
SLEEP_INTERVAL=${OPTARG} | |
;; | |
esac | |
done | |
if [[ ${SLEEP_INTERVAL} != ?(-)+([0-9]) ]]; then | |
logger "${SLEEP_INTERVAL} is not an integer" | |
usage | |
exit 1 | |
fi | |
gc_pods(){ | |
POD_IDS=($(crictl pods -q)) | |
POD_SCOPES=() | |
for POD_ID in ${POD_IDS[@]}; do | |
JSONDUMP="`crictl inspectp ${POD_ID}`" | |
POD_NAME="`echo ${JSONDUMP} | jq -r '.status.metadata.name'`" | |
POD_SCOPE="`echo ${JSONDUMP} | jq -r '.info.runtimeSpec.linux.cgroupsPath' | awk -F: '{print "crio-" $NF ".scope"}'`" | |
if [[ $? -ne 0 || -z "${POD_SCOPE}" ]]; then | |
logger "Error fetching pod SCOPE for pod with ID ${POD_ID}" | |
continue | |
else | |
POD_SCOPES+=($POD_SCOPE) | |
fi | |
POD_NAMESPACE="`echo ${JSONDUMP} | jq -r '.status.metadata.namespace'`" | |
if [[ $? -ne 0 || -z "${POD_NAMESPACE}" ]]; then | |
logger "Error fetching pod NAMESPACE for pod with ID ${POD_ID}" | |
continue | |
fi | |
POD_CREATED="`echo ${JSONDUMP} | jq -r '.status.createdAt'`" | |
if [[ $? -ne 0 || -z "${POD_CREATED}" ]]; then | |
logger "Error fetching pod created timestamp for pod with ID ${POD_ID}" | |
continue | |
fi | |
POD_NS="`echo ${JSONDUMP} | jq -r '.info.runtimeSpec.linux.namespaces[]|.path' | grep run | awk -F\/ '{print $NF}' | sort -u`" | |
if [[ $? -ne 0 || -z "${POD_NS}" ]]; then | |
logger "Error fetching pod namespace for pod with ID ${POD_ID}" | |
continue | |
fi | |
if ip netns list | grep -q ${POD_NS}; then | |
POD_PIDS=($(ip netns pids ${POD_NS})) | |
if [[ $? -ne 0 ]]; then | |
logger "Error fetching pod PIDs for pod ${POD_NAME}" | |
continue | |
fi | |
else | |
POD_PIDS=() | |
fi | |
# check if pod is known to k8s control plane | |
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get pod ${POD_NAME} -n ${POD_NAMESPACE} &>/dev/null | |
if [[ $? -ne 0 ]]; then | |
# additional check for safety, making sure that if there's a problem with apiserver we don't blindly remove pods with running processes | |
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get --raw='/readyz' &>/dev/null | |
if [[ $? -ne 0 ]]; then | |
logger "Kubernetes API unavailable. Could be false positive so skipping deletion of POD ${POD_NAME}" | |
continue | |
fi | |
if [[ ${#POD_PIDS[@]} -eq 0 ]]; then | |
logger "Found POD ${POD_NAME} unknown to k8s control plane and without any PIDs, will delete it..." | |
crictl stopp ${POD_ID} || logger "Failed to stop POD ${POD_NAME}" | |
crictl rmp ${POD_ID} || logger "Error removing POD ${POD_NAME}" | |
fi | |
# else | |
# logger "Pod ${POD_NAME} in namespace ${POD_NAMESPACE} is still known to control plane, skipping..." | |
fi | |
done | |
} | |
gc_cgroups(){ | |
CGROUPDIRS=($(find /sys/fs/cgroup -type d)) | |
LEFTOVER_SCOPES_TMP=($(journalctl --since "10m ago" | egrep 'Failed to update stats for container|Failed to create existing container' | grep -o 'crio-.*scope' | sort -u)) | |
for PODID in `journalctl --since "10 ago" | grep 'Unable to fetch pod log stats' | grep -o '\/var.*:' | tr -d ':' | awk -F_ '{print $NF}' | sed 's/-/_/g' | sort -u`; do | |
LEFTOVER_SCOPES_TMP+=($(printf -- '%s\n' "${testarray[@]}" | grep ${PODID} | grep -o crio.*$)) | |
done | |
LEFTOVER_SCOPES=($(printf -- '%s\n' "${LEFTOVER_SCOPES_TMP[@]}" | sort -u)) | |
for SCOPENAME in ${LEFTOVER_SCOPES[@]}; do | |
if [[ " ${POD_SCOPES[*]} " =~ " ${SCOPENAME} " ]]; then | |
logger "Scope ${SCOPENAME} found under running pod, skipping..." | |
continue | |
else | |
for SCOPE in `printf -- '%s\n' "${CGROUPDIRS[@]}" | grep ${SCOPENAME}`; do | |
logger "Removing CGROUP ${SCOPENAME} and its parent..." | |
rmdir ${SCOPE} | |
if [[ $? -eq 0 ]]; then | |
rmdir `dirname ${SCOPE}` | |
if [[ $? -ne 0 ]]; then | |
logger "Failed to remove parent for CGROUP ${SCOPE}..." | |
fi | |
fi | |
done | |
fi | |
done | |
} | |
# sleep for 1m to allow garbage collector to setup properly and avoid pod start race-condition | |
sleep 60 | |
while true; do | |
logger "Starting k8s garbage collector run..." | |
KUBECONFIG=/var/lib/kubelet/kubeconfig kubectl get --raw='/readyz' &>/dev/null | |
if [[ $? -ne 0 ]]; then | |
logger "Kubernetes API unavailable. Cancelling run." | |
else | |
gc_pods | |
gc_cgroups | |
fi | |
logger "Sleeping for ${SLEEP_INTERVAL} seconds..." | |
sleep ${SLEEP_INTERVAL} | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: k8s-gc | |
namespace: garbage-collector | |
spec: | |
revisionHistoryLimit: 10 | |
selector: | |
matchLabels: | |
app: k8s-gc | |
template: | |
metadata: | |
creationTimestamp: null | |
labels: | |
app: k8s-gc | |
spec: | |
containers: | |
- command: ["/bin/sh"] | |
args: ["-c", "cp /tmp/gc.sh /host/tmp/gc.sh ; chmod +x /host/tmp/gc.sh ; chroot /host ./tmp/gc.sh -s $SLEEP_INTERVAL"] | |
image: image-registry.openshift-image-registry.svc:5000/openshift/cli | |
imagePullPolicy: IfNotPresent | |
name: garbage-collector | |
env: | |
- name: SLEEP_INTERVAL | |
value: "600" | |
securityContext: | |
privileged: true | |
runAsUser: 0 | |
volumeMounts: | |
- mountPath: /host | |
name: host | |
- mountPath: "/tmp/gc.sh" | |
subPath: gc | |
name: gc-script | |
enableServiceLinks: true | |
hostNetwork: true | |
hostPID: true | |
serviceAccount: garbage-collector | |
serviceAccountName: garbage-collector | |
terminationGracePeriodSeconds: 30 | |
volumes: | |
- name: gc-script | |
configMap: | |
name: gc-script | |
- hostPath: | |
path: / | |
type: Directory | |
name: host | |
nodeSelector: | |
kubernetes.io/os: linux | |
tolerations: | |
- operator: Exists | |
updateStrategy: | |
type: OnDelete |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: v1 | |
kind: Namespace | |
metadata: | |
name: garbage-collector | |
annotations: | |
openshift.io/node-selector: "" | |
labels: | |
openshift.io/cluster-monitoring: "true" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
apiVersion: v1 | |
kind: ServiceAccount | |
metadata: | |
name: garbage-collector | |
namespace: garbage-collector | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1 | |
kind: RoleBinding | |
metadata: | |
name: system:openshift:scc:anyuid | |
namespace: garbage-collector | |
roleRef: | |
apiGroup: rbac.authorization.k8s.io | |
kind: ClusterRole | |
name: system:openshift:scc:anyuid | |
subjects: | |
- kind: ServiceAccount | |
name: garbage-collector | |
namespace: garbage-collector | |
--- | |
apiVersion: rbac.authorization.k8s.io/v1 | |
kind: RoleBinding | |
metadata: | |
name: system:openshift:scc:privileged | |
namespace: garbage-collector | |
roleRef: | |
apiGroup: rbac.authorization.k8s.io | |
kind: ClusterRole | |
name: system:openshift:scc:privileged | |
subjects: | |
- kind: ServiceAccount | |
name: garbage-collector | |
namespace: garbage-collector |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment