Last active
August 2, 2024 13:57
-
-
Save WalBeh/60e93aa816d4f6af5027dd2e7432970a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| This script is used to update the JMX exporter version in the CrateDB StatefulSet. | |
| JMX exporter is referenced in | |
| - the initContainer, | |
| - the CRATE_JAVA_OPTS environment variable, and | |
| - the volumeMounts and | |
| - sql_exporter image. | |
| After updating the StatefulSet, a rolling restart of the pods is initiated. Before | |
| a POD is deleted we check the health of the CrateDB cluster by querying the cratedbs CRD | |
| for reaching the "GREEN" status. | |
| After deleting a pod we will wait for "Running" phase and all containers up. | |
| Once the pod is running we check the health of the CrateDB cluster by querying the custom | |
| resource definition (CRD) for the CrateDB cluster for reaching the "GREEN" again. | |
| The script will loop through all namespaces with the label "app.kubernetes.io/part-of=cratedb". | |
| If the environment variable NS_LIST is set to "all" or "ALL", it will update all namespaces. | |
| """ | |
| from datetime import datetime | |
| from enum import Enum | |
| from icecream import ic | |
| from kubernetes import client, config | |
| from distutils.util import strtobool | |
| # import json | |
| import time | |
| import os | |
| def custom_ic_output(*args): | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| return f"{timestamp} | " + " | ".join(str(arg) for arg in args) | |
| ic.configureOutput(prefix=custom_ic_output) | |
| class jmx_exporter(Enum): | |
| JAR_NEW = "crate-jmx-exporter-1.1.0.jar" | |
| JAR_OLD = "crate-jmx-exporter-1.0.0.jar" | |
| VERSION_NEW = "1.1.0" | |
| VERSION_OLD = "1.0.0" | |
| class sql_exporter(Enum): | |
| VERSION = "0.14.2" | |
| API_GROUP = "cloud.crate.io" | |
| KUBE_CONTEXT = "eks1-us-east-1-dev" | |
| NODE_NAME_HOT = "cloud.crate.io/node-name=hot" | |
| PART_OF_CRATEDB = "app.kubernetes.io/part-of=cratedb" | |
| RESOURCE_CRATEDB = "cratedbs" | |
| SLEEPY_TIME = 10 | |
| # MARK: Environment settings | |
| FORCE_ROLLING_RESTART = bool(strtobool(os.getenv("FORCE_ROLLING_RESTART", False))) | |
| KUBE_CONTEXT = os.getenv("KUBE_CONTEXT", "eks1-us-east-1-dev") | |
| NS_LIST = os.getenv( | |
| "NS_LIST", "43e19d14-1b74-4bd9-8fd8-f5cc59f57411" | |
| ) # "all" or "ALL" to update all namespaces | |
| def patch_sts(sts_name, namespace_name, path, value): | |
| sts_api = client.AppsV1Api() | |
| sts_api.patch_namespaced_stateful_set( | |
| sts_name, | |
| namespace_name, | |
| [ | |
| { | |
| "op": "replace", | |
| "path": path, | |
| "value": value, | |
| } | |
| ], | |
| ) | |
| def check_cratedb_status(crd, namespace_name): | |
| while True: | |
| cratedb = crd.list_namespaced_custom_object( | |
| group=API_GROUP, | |
| version="v1", | |
| namespace=namespace_name, | |
| plural=RESOURCE_CRATEDB, | |
| ) | |
| status = cratedb["items"][0]["status"]["crateDBStatus"]["health"] | |
| cluster = cratedb["items"][0]["spec"]["cluster"]["name"] | |
| ic(cluster, status) | |
| if status == "GREEN": | |
| break | |
| else: | |
| time.sleep(SLEEPY_TIME) | |
| def rolling_restart(pod_list, namespace_name): | |
| v1 = client.CoreV1Api() | |
| crd = client.CustomObjectsApi() | |
| for pod in pod_list: | |
| # Check if CrateDB cluster is GREEN before deleting the pod | |
| check_cratedb_status(crd, namespace_name) | |
| # Deleting the cratedb pod to restart it | |
| pod_name = pod.metadata.name | |
| ic(pod_name) | |
| v1.delete_namespaced_pod(pod_name, namespace_name) | |
| ic("Pod deleted.") | |
| time.sleep(SLEEPY_TIME * 2) | |
| while True: | |
| pod_status = v1.read_namespaced_pod_status(pod_name, namespace_name) | |
| ic(pod_name, pod_status.status.phase) | |
| if pod_status.status.phase == "Running": | |
| # Check if all containers in the pod are running | |
| all_containers_running = all( | |
| container.ready | |
| for container in pod_status.status.container_statuses | |
| ) | |
| ic(all_containers_running) | |
| if all_containers_running: | |
| break | |
| else: | |
| time.sleep(SLEEPY_TIME * 2) | |
| else: | |
| ic("Pod is not in Running phase yet.") | |
| time.sleep(SLEEPY_TIME) | |
| ic("giving healthprobe a chance to run.") | |
| time.sleep(SLEEPY_TIME * 3) | |
| # Check if CrateDB cluster is GREEN after pod restart | |
| # seems like double action, but prints the status of the cluster | |
| # before continuing with the next pod/namespace | |
| check_cratedb_status(crd, namespace_name) | |
| def main(): | |
| ic(KUBE_CONTEXT, NS_LIST, FORCE_ROLLING_RESTART) | |
| ready = input("Ready to continue? (y/n): ") | |
| if ready.lower() != "y": | |
| exit() | |
| config.load_kube_config(context=KUBE_CONTEXT) | |
| v1 = client.CoreV1Api() | |
| sts_api = client.AppsV1Api() | |
| # MARK: List namespaces | |
| # Get all namespaces with the label "app.kubernetes.io/part-of=cratedb" | |
| namespaces = v1.list_namespace(label_selector=PART_OF_CRATEDB).items | |
| for namespace in namespaces: | |
| namespace_name = namespace.metadata.name | |
| if NS_LIST not in ["all", "ALL"]: | |
| if namespace_name not in NS_LIST: | |
| continue | |
| ic(namespace_name) | |
| sts_list = sts_api.list_namespaced_stateful_set( | |
| namespace_name, label_selector=NODE_NAME_HOT | |
| ).items | |
| if not sts_list: | |
| ic("No StatefulSets found in the namespace.") | |
| continue | |
| sts_name = sts_list[0].metadata.name | |
| jmx_restart_pods = True | |
| sql_exporter_restart_pods = True | |
| # MARK: version_exists | |
| for container in sts_list[0].spec.template.spec.init_containers: | |
| if container.command is not None: | |
| command_string = " ".join(container.command) | |
| if jmx_exporter.JAR_NEW.value in command_string: | |
| jmx_restart_pods = False | |
| ic("JMX seems to be current.") | |
| break # Stop searching once we've found the version | |
| # MARK: sql_exporter | |
| for container in sts_list[0].spec.template.spec.containers: | |
| if container.name == "sql-exporter": | |
| ic(container.image) | |
| image_base, _ = container.image.split(":") | |
| updated_sql_exporter_image = ( | |
| f"{image_base}:{sql_exporter.VERSION.value}" | |
| ) | |
| if container.image == updated_sql_exporter_image: | |
| ic("SQL Exporter seems to be current.") | |
| sql_exporter_restart_pods = False | |
| break | |
| # MARK: initContainer | |
| for container in sts_list[0].spec.template.spec.init_containers: | |
| if container.name == "fetch-jmx-exporter": | |
| container.command[2] = f"/jmxdir/{jmx_exporter.JAR_NEW.value}" | |
| container.command[3] = ( | |
| f"https://repo1.maven.org/maven2/io/crate/crate-jmx-exporter/{jmx_exporter.VERSION_NEW.value}/{jmx_exporter.JAR_NEW.value}" | |
| ) | |
| updated_wget = container | |
| break | |
| # MARK: CRATE_JAVA_OPTS | |
| container_env = sts_list[0].spec.template.spec.containers[1].env | |
| for env_var in container_env: | |
| if env_var.name == "CRATE_JAVA_OPTS": | |
| env_var.value = env_var.value.replace( | |
| jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value | |
| ) | |
| break | |
| # MARK: VolumeMounts | |
| # For the volumeMounts, we need to update the mountPath and subPath and | |
| # then return the whole list of volumeMounts again! | |
| updated_container_volumemounts = [] | |
| container_volumemounts = ( | |
| sts_list[0].spec.template.spec.containers[1].volume_mounts | |
| ) | |
| for volume_mount in container_volumemounts: | |
| if ( | |
| hasattr(volume_mount, "mount_path") | |
| and volume_mount.mount_path | |
| and jmx_exporter.JAR_OLD.value in volume_mount.mount_path | |
| ): | |
| volume_mount.mount_path = volume_mount.mount_path.replace( | |
| jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value | |
| ) | |
| if ( | |
| hasattr(volume_mount, "sub_path") | |
| and volume_mount.sub_path | |
| and jmx_exporter.JAR_OLD.value in volume_mount.sub_path | |
| ): | |
| volume_mount.sub_path = volume_mount.sub_path.replace( | |
| jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value | |
| ) | |
| updated_container_volumemounts.append(volume_mount) | |
| # MARK: Patch STS | |
| patch_sts( | |
| sts_name, | |
| namespace_name, | |
| "/spec/template/spec/containers/0/image", | |
| updated_sql_exporter_image, | |
| ) | |
| patch_sts( | |
| sts_name, | |
| namespace_name, | |
| "/spec/template/spec/containers/1/volumeMounts", | |
| updated_container_volumemounts, | |
| ) | |
| patch_sts( | |
| sts_name, | |
| namespace_name, | |
| "/spec/template/spec/containers/1/env", | |
| container_env, | |
| ) | |
| patch_sts( | |
| sts_name, | |
| namespace_name, | |
| "/spec/template/spec/initContainers/1", | |
| updated_wget, | |
| ) | |
| # MARK: Restart pods | |
| pod_list = v1.list_namespaced_pod( | |
| namespace_name, label_selector=NODE_NAME_HOT | |
| ).items | |
| if not pod_list: | |
| ic("No pods found, suspended cluster?") | |
| continue | |
| ic(jmx_restart_pods, sql_exporter_restart_pods, FORCE_ROLLING_RESTART) | |
| if jmx_restart_pods or sql_exporter_restart_pods or FORCE_ROLLING_RESTART: | |
| rolling_restart(pod_list, namespace_name) | |
| # Entry point of the script | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment