Last active
December 1, 2025 19:39
-
-
Save Souheil-Yazji/fe35909cbed2406ef6460fcc470c672c to your computer and use it in GitHub Desktop.
Delete Orphaned Kubeflow Notebook StatefulSets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # ██ ██ █████ ██████ ███ ██ ██ ███ ██ ██████ | |
| # ██ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██ ██ | |
| # ██ █ ██ ███████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ███ | |
| # ██ ███ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ | |
| # ███ ███ ██ ██ ██ ██ ██ ████ ██ ██ ████ ██████ | |
| # | |
| # This script can make changes to your cluster resources. Please review and understand it before running. | |
| # It identifies and deletes orphaned StatefulSets in a Kubernetes cluster that are | |
| # associated with Kubeflow Notebooks but whose corresponding Notebook Custom Resources no longer exist. | |
| # It supports a dry-run mode to preview the deletions before executing them. | |
| # Exit immediately if a command fails, treat unset variables as errors, and fail pipelines on the first error. | |
| set -euo pipefail | |
| # Usage: | |
| # ./sts-deleter.sh # dry-run (default) | |
| # ./sts-deleter.sh --execute # actually delete orphaned StatefulSets | |
| # | |
| # Requirements: | |
| # - kubectl | |
| # - jq | |
| # | |
| # Behavior: | |
| # - Finds StatefulSets with a Notebook-related label | |
| # - If the corresponding Notebook CR does NOT exist in the same namespace, | |
| # the StatefulSet is considered orphaned. | |
| # - In dry-run mode, prints the kubectl delete commands. | |
| # - With --execute, runs the delete commands. | |
| DRY_RUN=true | |
| if [[ "${1:-}" == "--execute" ]]; then | |
| DRY_RUN=false | |
| echo "⚠️ EXECUTION MODE: orphaned StatefulSets will be DELETED." | |
| echo | |
| else | |
| echo "🔎 DRY-RUN MODE: showing orphaned StatefulSets and delete commands only." | |
| echo " Pass --execute to actually delete them." | |
| echo | |
| fi | |
| # Helper: check if jq is installed | |
| if ! command -v jq >/dev/null 2>&1; then | |
| echo "ERROR: 'jq' is required but not installed. Please install jq and retry." >&2 | |
| exit 1 | |
| fi | |
| NAMESPACE_SELECTOR="${NAMESPACE_SELECTOR:-"app.kubernetes.io/part-of=kubeflow-profile"}" # Optional: We only want to target namespaces associated with Kubeflow profiles. | |
| echo "Discovering namespaces labeled with '$NAMESPACE_SELECTOR'..." | |
| NAMESPACES=$(kubectl get ns -l "$NAMESPACE_SELECTOR" -o jsonpath='{.items[*].metadata.name}') | |
| if [[ -z "$NAMESPACES" ]]; then | |
| echo "WARNING: No namespaces found with label '$NAMESPACE_SELECTOR'. Exiting." >&2 | |
| exit 0 | |
| fi | |
| # Check if Istio CRDs (virtualservices/authorizationpolicies) exist | |
| HAS_VS=false | |
| HAS_AP=false | |
| if kubectl get crd virtualservices.networking.istio.io >/dev/null 2>&1; then | |
| HAS_VS=true | |
| fi | |
| if kubectl get crd authorizationpolicies.security.istio.io >/dev/null 2>&1; then | |
| HAS_AP=true | |
| fi | |
| # Convert space-separated namespaces into an array | |
| read -ra NAMESPACES_ARRAY <<< "$NAMESPACES" | |
| for ns in "${NAMESPACES_ARRAY[@]}"; do | |
| echo "Checking namespace: ${ns}" | |
| # --- StatefulSets --- | |
| # Get all statefulsets in the namespace and filter for ones with `notebook-name` label | |
| while IFS=$'\t' read -r sts nb_name; do | |
| if [[ -z "${sts}" ]]; then | |
| continue | |
| fi | |
| if kubectl get notebooks.kubeflow.org "${nb_name}" -n "${ns}" >/dev/null 2>&1; then | |
| # Notebook exists – this StatefulSet is not orphaned | |
| continue | |
| fi | |
| echo " Orphan detected: Notebook ${ns}/${nb_name} is missing, but StatefulSet ${ns}/${sts} exists." | |
| # 1) Delete StatefulSet | |
| if $DRY_RUN; then | |
| echo " → Would run: kubectl delete sts ${sts} -n ${ns}" | |
| else | |
| echo " → Deleting: kubectl delete sts ${sts} -n ${ns}" | |
| kubectl delete sts "${sts}" -n "${ns}" | |
| fi | |
| # 2) Delete Service (name == notebook name) | |
| if kubectl get svc "${nb_name}" -n "${ns}" >/dev/null 2>&1; then | |
| if $DRY_RUN; then | |
| echo " → Would run: kubectl delete svc ${nb_name} -n ${ns} --wait=false" | |
| else | |
| echo " → Deleting Service ${ns}/${nb_name}..." | |
| kubectl delete svc "${nb_name}" -n "${ns}" --wait=false || true | |
| fi | |
| fi | |
| # --- Istio resources cleanup (if applicable) --- | |
| # 3) Delete VirtualService (name == notebook-<ns>-<nb_name>) | |
| if [[ "${HAS_VS}" == "true" ]]; then | |
| VS_NAME="notebook-${ns}-${nb_name}" | |
| if kubectl get virtualservice.networking.istio.io "${VS_NAME}" -n "${ns}" >/dev/null 2>&1; then | |
| if $DRY_RUN; then | |
| echo " → Would run: kubectl delete virtualservice.networking.istio.io ${VS_NAME} -n ${ns} --wait=false" | |
| else | |
| echo " → Deleting VirtualService ${ns}/${VS_NAME}..." | |
| kubectl delete virtualservice.networking.istio.io "${VS_NAME}" -n "${ns}" --wait=false || true | |
| fi | |
| fi | |
| fi | |
| # 4) Delete AuthorizationPolicy (name == notebook-<ns>-<nb_name>-block-downloads) | |
| if [[ "${HAS_AP}" == "true" ]]; then | |
| AP_NAME="notebook-${ns}-${nb_name}-block-downloads" | |
| if kubectl get authorizationpolicy.security.istio.io "${AP_NAME}" -n "${ns}" >/dev/null 2>&1; then | |
| if $DRY_RUN; then | |
| echo " → Would run: kubectl delete authorizationpolicy.security.istio.io ${AP_NAME} -n ${ns} --wait=false" | |
| else | |
| echo " → Deleting AuthorizationPolicy ${ns}/${AP_NAME}..." | |
| kubectl delete authorizationpolicy.security.istio.io "${AP_NAME}" -n "${ns}" --wait=false || true | |
| fi | |
| fi | |
| fi | |
| done < <(kubectl get sts -n "${ns}" -o json 2>/dev/null | jq -r '.items[] | select(.spec.template.metadata.labels["notebook-name"] != null) | [.metadata.name, .spec.template.metadata.labels["notebook-name"]] | @tsv') | |
| echo | |
| done | |
| echo "✅ Done." |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Update #1, substantial revision to logic and output to end user.