Skip to content

Instantly share code, notes, and snippets.

@WalBeh
Last active August 2, 2024 13:57
Show Gist options
  • Select an option

  • Save WalBeh/60e93aa816d4f6af5027dd2e7432970a to your computer and use it in GitHub Desktop.

Select an option

Save WalBeh/60e93aa816d4f6af5027dd2e7432970a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
This script is used to update the JMX exporter version in the CrateDB StatefulSet.
JMX exporter is referenced in
- the initContainer,
- the CRATE_JAVA_OPTS environment variable, and
- the volumeMounts and
- sql_exporter image.
After updating the StatefulSet, a rolling restart of the pods is initiated. Before
a POD is deleted we check the health of the CrateDB cluster by querying the cratedbs CRD
for reaching the "GREEN" status.
After deleting a pod we will wait for "Running" phase and all containers up.
Once the pod is running we check the health of the CrateDB cluster by querying the custom
resource definition (CRD) for the CrateDB cluster for reaching the "GREEN" again.
The script will loop through all namespaces with the label "app.kubernetes.io/part-of=cratedb".
If the environment variable NS_LIST is set to "all" or "ALL", it will update all namespaces.
"""
from datetime import datetime
from enum import Enum
from icecream import ic
from kubernetes import client, config
from distutils.util import strtobool
# import json
import time
import os
def custom_ic_output(*args):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return f"{timestamp} | " + " | ".join(str(arg) for arg in args)
ic.configureOutput(prefix=custom_ic_output)
class jmx_exporter(Enum):
JAR_NEW = "crate-jmx-exporter-1.1.0.jar"
JAR_OLD = "crate-jmx-exporter-1.0.0.jar"
VERSION_NEW = "1.1.0"
VERSION_OLD = "1.0.0"
class sql_exporter(Enum):
VERSION = "0.14.2"
API_GROUP = "cloud.crate.io"
KUBE_CONTEXT = "eks1-us-east-1-dev"
NODE_NAME_HOT = "cloud.crate.io/node-name=hot"
PART_OF_CRATEDB = "app.kubernetes.io/part-of=cratedb"
RESOURCE_CRATEDB = "cratedbs"
SLEEPY_TIME = 10
# MARK: Environment settings
FORCE_ROLLING_RESTART = bool(strtobool(os.getenv("FORCE_ROLLING_RESTART", False)))
KUBE_CONTEXT = os.getenv("KUBE_CONTEXT", "eks1-us-east-1-dev")
NS_LIST = os.getenv(
"NS_LIST", "43e19d14-1b74-4bd9-8fd8-f5cc59f57411"
) # "all" or "ALL" to update all namespaces
def patch_sts(sts_name, namespace_name, path, value):
sts_api = client.AppsV1Api()
sts_api.patch_namespaced_stateful_set(
sts_name,
namespace_name,
[
{
"op": "replace",
"path": path,
"value": value,
}
],
)
def check_cratedb_status(crd, namespace_name):
while True:
cratedb = crd.list_namespaced_custom_object(
group=API_GROUP,
version="v1",
namespace=namespace_name,
plural=RESOURCE_CRATEDB,
)
status = cratedb["items"][0]["status"]["crateDBStatus"]["health"]
cluster = cratedb["items"][0]["spec"]["cluster"]["name"]
ic(cluster, status)
if status == "GREEN":
break
else:
time.sleep(SLEEPY_TIME)
def rolling_restart(pod_list, namespace_name):
v1 = client.CoreV1Api()
crd = client.CustomObjectsApi()
for pod in pod_list:
# Check if CrateDB cluster is GREEN before deleting the pod
check_cratedb_status(crd, namespace_name)
# Deleting the cratedb pod to restart it
pod_name = pod.metadata.name
ic(pod_name)
v1.delete_namespaced_pod(pod_name, namespace_name)
ic("Pod deleted.")
time.sleep(SLEEPY_TIME * 2)
while True:
pod_status = v1.read_namespaced_pod_status(pod_name, namespace_name)
ic(pod_name, pod_status.status.phase)
if pod_status.status.phase == "Running":
# Check if all containers in the pod are running
all_containers_running = all(
container.ready
for container in pod_status.status.container_statuses
)
ic(all_containers_running)
if all_containers_running:
break
else:
time.sleep(SLEEPY_TIME * 2)
else:
ic("Pod is not in Running phase yet.")
time.sleep(SLEEPY_TIME)
ic("giving healthprobe a chance to run.")
time.sleep(SLEEPY_TIME * 3)
# Check if CrateDB cluster is GREEN after pod restart
# seems like double action, but prints the status of the cluster
# before continuing with the next pod/namespace
check_cratedb_status(crd, namespace_name)
def main():
ic(KUBE_CONTEXT, NS_LIST, FORCE_ROLLING_RESTART)
ready = input("Ready to continue? (y/n): ")
if ready.lower() != "y":
exit()
config.load_kube_config(context=KUBE_CONTEXT)
v1 = client.CoreV1Api()
sts_api = client.AppsV1Api()
# MARK: List namespaces
# Get all namespaces with the label "app.kubernetes.io/part-of=cratedb"
namespaces = v1.list_namespace(label_selector=PART_OF_CRATEDB).items
for namespace in namespaces:
namespace_name = namespace.metadata.name
if NS_LIST not in ["all", "ALL"]:
if namespace_name not in NS_LIST:
continue
ic(namespace_name)
sts_list = sts_api.list_namespaced_stateful_set(
namespace_name, label_selector=NODE_NAME_HOT
).items
if not sts_list:
ic("No StatefulSets found in the namespace.")
continue
sts_name = sts_list[0].metadata.name
jmx_restart_pods = True
sql_exporter_restart_pods = True
# MARK: version_exists
for container in sts_list[0].spec.template.spec.init_containers:
if container.command is not None:
command_string = " ".join(container.command)
if jmx_exporter.JAR_NEW.value in command_string:
jmx_restart_pods = False
ic("JMX seems to be current.")
break # Stop searching once we've found the version
# MARK: sql_exporter
for container in sts_list[0].spec.template.spec.containers:
if container.name == "sql-exporter":
ic(container.image)
image_base, _ = container.image.split(":")
updated_sql_exporter_image = (
f"{image_base}:{sql_exporter.VERSION.value}"
)
if container.image == updated_sql_exporter_image:
ic("SQL Exporter seems to be current.")
sql_exporter_restart_pods = False
break
# MARK: initContainer
for container in sts_list[0].spec.template.spec.init_containers:
if container.name == "fetch-jmx-exporter":
container.command[2] = f"/jmxdir/{jmx_exporter.JAR_NEW.value}"
container.command[3] = (
f"https://repo1.maven.org/maven2/io/crate/crate-jmx-exporter/{jmx_exporter.VERSION_NEW.value}/{jmx_exporter.JAR_NEW.value}"
)
updated_wget = container
break
# MARK: CRATE_JAVA_OPTS
container_env = sts_list[0].spec.template.spec.containers[1].env
for env_var in container_env:
if env_var.name == "CRATE_JAVA_OPTS":
env_var.value = env_var.value.replace(
jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value
)
break
# MARK: VolumeMounts
# For the volumeMounts, we need to update the mountPath and subPath and
# then return the whole list of volumeMounts again!
updated_container_volumemounts = []
container_volumemounts = (
sts_list[0].spec.template.spec.containers[1].volume_mounts
)
for volume_mount in container_volumemounts:
if (
hasattr(volume_mount, "mount_path")
and volume_mount.mount_path
and jmx_exporter.JAR_OLD.value in volume_mount.mount_path
):
volume_mount.mount_path = volume_mount.mount_path.replace(
jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value
)
if (
hasattr(volume_mount, "sub_path")
and volume_mount.sub_path
and jmx_exporter.JAR_OLD.value in volume_mount.sub_path
):
volume_mount.sub_path = volume_mount.sub_path.replace(
jmx_exporter.JAR_OLD.value, jmx_exporter.JAR_NEW.value
)
updated_container_volumemounts.append(volume_mount)
# MARK: Patch STS
patch_sts(
sts_name,
namespace_name,
"/spec/template/spec/containers/0/image",
updated_sql_exporter_image,
)
patch_sts(
sts_name,
namespace_name,
"/spec/template/spec/containers/1/volumeMounts",
updated_container_volumemounts,
)
patch_sts(
sts_name,
namespace_name,
"/spec/template/spec/containers/1/env",
container_env,
)
patch_sts(
sts_name,
namespace_name,
"/spec/template/spec/initContainers/1",
updated_wget,
)
# MARK: Restart pods
pod_list = v1.list_namespaced_pod(
namespace_name, label_selector=NODE_NAME_HOT
).items
if not pod_list:
ic("No pods found, suspended cluster?")
continue
ic(jmx_restart_pods, sql_exporter_restart_pods, FORCE_ROLLING_RESTART)
if jmx_restart_pods or sql_exporter_restart_pods or FORCE_ROLLING_RESTART:
rolling_restart(pod_list, namespace_name)
# Entry point of the script
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment