Last active
December 1, 2025 21:00
-
-
Save Souheil-Yazji/fadb24b5edb7ab38d6f23f5b50195b29 to your computer and use it in GitHub Desktop.
Kubeflow Notebook StatefulSet OwnerReference Reconciler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # ββ ββ βββββ ββββββ βββ ββ ββ βββ ββ ββββββ | |
| # ββ ββ ββ ββ ββ ββ ββββ ββ ββ ββββ ββ ββ | |
| # ββ β ββ βββββββ ββββββ ββ ββ ββ ββ ββ ββ ββ ββ βββ | |
| # ββ βββ ββ ββ ββ ββ ββ ββ ββ ββ ββ ββ ββ ββ ββ ββ | |
| # βββ βββ ββ ββ ββ ββ ββ ββββ ββ ββ ββββ ββββββ | |
| # | |
| # This script can make changes to your cluster resources. Please review and understand it before running. | |
| # It reconciles Notebook-related resources in a Kubernetes cluster by adding owner references to them. | |
| # This allows Kubernetes garbage collection to clean them up when the owning Notebook is deleted. | |
| # Resources patched: StatefulSets, Services, VirtualServices (Istio), AuthorizationPolicies (Istio). | |
| # It supports a dry-run mode to preview the patches before executing them. | |
| # Exit immediately if a command fails, treat unset variables as errors, and fail pipelines on the first error. | |
| set -euo pipefail | |
| # Usage: | |
| # ./sts-reconsiler.sh # dry-run (default) | |
| # ./sts-reconsiler.sh --execute # actually patch resources with owner references | |
| # | |
| # Requirements: | |
| # - kubectl | |
| # - jq | |
| # | |
| # Behavior: | |
| # - Finds Notebook-related resources (STS, Service, VirtualService, AuthorizationPolicy) with no owner references | |
| # - If the corresponding Notebook CR exists in the same namespace, | |
| # adds an owner reference to all matching resources pointing to the Notebook. | |
| # - In dry-run mode, prints the patches that would be applied. | |
| # - With --execute, applies the patches. | |
| DRY_RUN=true | |
| if [[ "${1:-}" == "--execute" ]]; then | |
| DRY_RUN=false | |
| echo "β οΈ EXECUTION MODE: Notebook-related resources will be PATCHED with owner references." | |
| echo | |
| else | |
| echo "π DRY-RUN MODE: showing patches that would be applied only." | |
| echo " Pass --execute to actually patch them." | |
| echo | |
| fi | |
| # Helper: check if jq is installed | |
| if ! command -v jq >/dev/null 2>&1; then | |
| echo "ERROR: 'jq' is required but not installed. Please install jq and retry." >&2 | |
| exit 1 | |
| fi | |
| NAMESPACE_SELECTOR="${NAMESPACE_SELECTOR:-"app.kubernetes.io/part-of=kubeflow-profile"}" # Optional: We only want to target namespaces associated with Kubeflow profiles. | |
| # Check if Istio CRDs (virtualservices/authorizationpolicies) exist | |
| HAS_VS=false | |
| HAS_AP=false | |
| if kubectl get crd virtualservices.networking.istio.io >/dev/null 2>&1; then | |
| HAS_VS=true | |
| fi | |
| if kubectl get crd authorizationpolicies.security.istio.io >/dev/null 2>&1; then | |
| HAS_AP=true | |
| fi | |
| # Helper function to patch a resource with owner reference | |
| patch_resource_with_owner() { | |
| local resource_type="$1" | |
| local resource_name="$2" | |
| local namespace="$3" | |
| local nb_apiversion="$4" | |
| local nb_kind="$5" | |
| local nb_name="$6" | |
| local nb_uid="$7" | |
| local patch_json="[ | |
| { | |
| \"op\": \"add\", | |
| \"path\": \"/metadata/ownerReferences\", | |
| \"value\": [ | |
| { | |
| \"apiVersion\": \"${nb_apiversion}\", | |
| \"kind\": \"${nb_kind}\", | |
| \"name\": \"${nb_name}\", | |
| \"uid\": \"${nb_uid}\", | |
| \"controller\": true, | |
| \"blockOwnerDeletion\": true | |
| } | |
| ] | |
| } | |
| ]" | |
| if kubectl get "${resource_type}" "${resource_name}" -n "${namespace}" >/dev/null 2>&1; then | |
| # Check if already has owner references | |
| if kubectl get "${resource_type}" "${resource_name}" -n "${namespace}" -o json 2>/dev/null | jq -e '.metadata.ownerReferences | length > 0' >/dev/null 2>&1; then | |
| return 0 | |
| fi | |
| if $DRY_RUN; then | |
| echo " β Would patch: kubectl patch ${resource_type} ${resource_name} -n ${namespace} --type=json -p '[...]'" | |
| else | |
| echo " β Patching ${resource_type}: ${namespace}/${resource_name}" | |
| if kubectl patch "${resource_type}" "${resource_name}" -n "${namespace}" --type=json -p "$patch_json" >/dev/null 2>&1; then | |
| echo " β Successfully patched ${resource_type} ${namespace}/${resource_name}" | |
| else | |
| echo " β Failed to patch ${resource_type} ${namespace}/${resource_name}" >&2 | |
| fi | |
| fi | |
| fi | |
| } | |
| echo "Discovering namespaces labeled with '$NAMESPACE_SELECTOR'..." | |
| NAMESPACES=$(kubectl get ns -l "$NAMESPACE_SELECTOR" -o jsonpath='{.items[*].metadata.name}') | |
| if [[ -z "$NAMESPACES" ]]; then | |
| echo "WARNING: No namespaces found with label '$NAMESPACE_SELECTOR'. Exiting." >&2 | |
| exit 0 | |
| fi | |
| # Convert space-separated namespaces into an array | |
| read -ra NAMESPACES_ARRAY <<< "$NAMESPACES" | |
| for ns in "${NAMESPACES_ARRAY[@]}"; do | |
| echo "Checking namespace: ${ns}" | |
| # --- StatefulSets --- | |
| # Get all statefulsets in the namespace that have notebook-name label but no owner references | |
| while IFS=$'\t' read -r sts nb_name; do | |
| if [[ -z "${sts}" ]] || [[ -z "${nb_name}" ]]; then | |
| continue | |
| fi | |
| # Check if the Notebook exists | |
| if ! kubectl get notebooks.kubeflow.org "${nb_name}" -n "${ns}" >/dev/null 2>&1; then | |
| echo " Skipping ${ns}/${sts}: Notebook ${nb_name} does not exist in ${ns}." | |
| continue | |
| fi | |
| # Check if StatefulSet already has owner references | |
| if kubectl get sts "${sts}" -n "${ns}" -o json 2>/dev/null | jq -e '.metadata.ownerReferences | length > 0' >/dev/null 2>&1; then | |
| echo " Skipping ${ns}/${sts}: already has owner references." | |
| continue | |
| fi | |
| echo " Found orphaned StatefulSet: ${ns}/${sts} for notebook ${nb_name}" | |
| # Get Notebook metadata | |
| NB_UID=$(kubectl get notebooks.kubeflow.org "${nb_name}" -n "${ns}" -o jsonpath='{.metadata.uid}' 2>/dev/null || echo "") | |
| NB_APIVERSION=$(kubectl get notebooks.kubeflow.org "${nb_name}" -n "${ns}" -o jsonpath='{.apiVersion}' 2>/dev/null || echo "") | |
| NB_KIND=$(kubectl get notebooks.kubeflow.org "${nb_name}" -n "${ns}" -o jsonpath='{.kind}' 2>/dev/null || echo "") | |
| if [[ -z "${NB_UID}" ]] || [[ -z "${NB_APIVERSION}" ]] || [[ -z "${NB_KIND}" ]]; then | |
| echo " ERROR: Could not retrieve Notebook metadata. Skipping." | |
| continue | |
| fi | |
| # Patch StatefulSet | |
| if kubectl get sts "${sts}" -n "${ns}" -o json 2>/dev/null | jq -e '.metadata.ownerReferences | length == 0' >/dev/null 2>&1; then | |
| patch_resource_with_owner "sts" "${sts}" "${ns}" "${NB_APIVERSION}" "${NB_KIND}" "${nb_name}" "${NB_UID}" | |
| fi | |
| # Patch Service (name == notebook name) | |
| patch_resource_with_owner "svc" "${nb_name}" "${ns}" "${NB_APIVERSION}" "${NB_KIND}" "${nb_name}" "${NB_UID}" | |
| # Patch VirtualService (name == notebook-<ns>-<nb_name>) | |
| if [[ "${HAS_VS}" == "true" ]]; then | |
| VS_NAME="notebook-${ns}-${nb_name}" | |
| patch_resource_with_owner "virtualservice.networking.istio.io" "${VS_NAME}" "${ns}" "${NB_APIVERSION}" "${NB_KIND}" "${nb_name}" "${NB_UID}" | |
| fi | |
| # Patch AuthorizationPolicy (name == notebook-<ns>-<nb_name>-block-downloads) | |
| if [[ "${HAS_AP}" == "true" ]]; then | |
| AP_NAME="notebook-${ns}-${nb_name}-block-downloads" | |
| patch_resource_with_owner "authorizationpolicy.security.istio.io" "${AP_NAME}" "${ns}" "${NB_APIVERSION}" "${NB_KIND}" "${nb_name}" "${NB_UID}" | |
| fi | |
| done < <(kubectl get sts -n "${ns}" -o json 2>/dev/null | jq -r '.items[] | select(.spec.template.metadata.labels["notebook-name"] != null) | select((.metadata.ownerReferences // []) | length == 0) | [.metadata.name, .spec.template.metadata.labels["notebook-name"]] | @tsv') | |
| echo | |
| done | |
| echo "β Done." | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Update #1, substantial revision to logic and output to end user. Also handles all notebook related resources not just sts.