Last active
November 26, 2024 07:40
-
-
Save minrk/7ad21b18f7a5b3908d74f108dc564cd5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to patch pvc_name associations in KubeSpawner | |
for https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/3574 | |
Meant to be run via `kubectl exec` in the hub pod. | |
If a server has multiple matching pods, `select_pvc` is used to pick which one to associate. | |
Default: pick the oldest match. Edit `select_pvc` to change this behavior. | |
for a dry run (to see what it will do): | |
cat collect_pvcs.py | kubectl exec -i $(kubectl get pod -l component=hub -o name) -- python3 | |
to apply actual changes: | |
cat collect_pvcs.py | kubectl exec -i $(kubectl get pod -l component=hub -o name) -- python3 - --apply | |
License: CC-0 (public domain) | |
https://gist.github.com/minrk/7ad21b18f7a5b3908d74f108dc564cd5 | |
""" | |
import asyncio | |
import os | |
from argparse import ArgumentParser | |
from kubernetes_asyncio.config import load_incluster_config | |
from kubernetes_asyncio.client import CoreV1Api | |
from kubernetes_asyncio.client.api_client import ApiClient | |
from sqlalchemy.orm import raiseload, joinedload, load_only | |
from jupyterhub.app import JupyterHub | |
from jupyterhub import orm | |
namespace = os.environ["POD_NAMESPACE"] | |
def select_pvc(pvc_list): | |
"""This function picks a pvc when there are multiple matches | |
Edit this function to change how PVCs are chosen. | |
Default: pick the oldest one because the z2jh 4.0 upgrade bug | |
will result in creating new PVCs that it shouldn't have, | |
so keep the old one. | |
""" | |
def get_creation_time(pvc): | |
return pvc.metadata.creation_timestamp | |
return sorted(pvc_list, key=get_creation_time)[0] | |
async def yield_pvcs(k8s, namespace): | |
"""yield all pvcs in a namespace""" | |
_continue = True | |
while _continue is not None: | |
if _continue is True: | |
# first page | |
_continue = None | |
pvc_list = await k8s.list_namespaced_persistent_volume_claim( | |
namespace, | |
_continue=_continue, | |
label_selector="component=singleuser-storage", | |
) | |
_continue = pvc_list._metadata._continue | |
for pvc in pvc_list.items: | |
yield pvc | |
async def collect_user_pvcs(): | |
"""Collect a dict of (username, servername): [pvc]""" | |
load_incluster_config() | |
user_pvcs = {} | |
async with ApiClient() as api_client: | |
k8s = CoreV1Api(api_client) | |
async for pvc in yield_pvcs(k8s, namespace): | |
annotations = pvc.metadata.annotations | |
username = annotations.get("hub.jupyter.org/username") | |
servername = annotations.get("hub.jupyter.org/servername", "") | |
user_pvcs.setdefault((username, servername), []).append(pvc) | |
return user_pvcs | |
def connect_db(): | |
"""Connect to the JupyterHub database""" | |
hub = JupyterHub() | |
hub.load_config_file(hub.config_file) | |
db_url = hub.db_url | |
print(f"Connecting to {db_url}") | |
db = orm.new_session_factory(db_url, **hub.db_kwargs)() | |
return db | |
async def main(): | |
parser = ArgumentParser() | |
parser.add_argument("--apply", action="store_true", help="") | |
args = parser.parse_args() | |
dry_run = not args.apply | |
user_pvcs = await collect_user_pvcs() | |
db = connect_db() | |
changes_made = 0 | |
db.get_bind().echo = True | |
for spawner in db.query(orm.Spawner).options(load_only(orm.Spawner.state, orm.Spawner.name)).options(joinedload(orm.Spawner.user).load_only(orm.User.name)).options(raiseload('*')): | |
state = spawner.state or {} | |
username = spawner.user.name | |
servername = spawner.name | |
label = f"{username}/{servername}" | |
key = (username, servername) | |
found_pvcs = user_pvcs.pop(key, []) | |
state_pvc_name = state.get("pvc_name", None) | |
if len(found_pvcs) == 1: | |
pvc = found_pvcs[0] | |
pvc_name = pvc.metadata.name | |
print(f"{label} has pvc {pvc_name}") | |
if state_pvc_name == pvc_name: | |
print(f" {label} is linked to only matching pvc {pvc_name} (good)") | |
elif state_pvc_name: | |
print( | |
f" {label} is linked to pvc {state_pvc_name}, but labels match {pvc_name}!" | |
) | |
# TODO: try to fix it? Perhaps more likely labeling is wrong | |
else: | |
print(f" {label} is not linked to a pvc") | |
print(f" {pvc_name}: created at {pvc.metadata.creation_timestamp}") | |
if not state_pvc_name: | |
new_state = {} | |
new_state.update(state) | |
new_state["pvc_name"] = pvc_name | |
print( | |
f"!!!!! linking server {label} to pvc {pvc_name} {'(dry run)' * dry_run} !!!!" | |
) | |
changes_made += 1 | |
if not dry_run: | |
# actually persist pvc_name in state | |
spawner.state = new_state | |
db.commit() | |
elif found_pvcs: | |
found_pvc_names = [pvc.metadata.name for pvc in found_pvcs] | |
print(f"{label} has multiple matching pvcs! {found_pvc_names}") | |
if state_pvc_name: | |
print(f" {label} is linked to {state_pvc_name}") | |
else: | |
print(f" {label} is not linked to a pvc") | |
for pvc in found_pvcs: | |
print( | |
f" {pvc.metadata.name}: created at {pvc.metadata.creation_timestamp}" | |
) | |
# selection rules for picking a pvc to link | |
selected_pvc = select_pvc(found_pvcs) | |
if selected_pvc and selected_pvc.metadata.name != state_pvc_name: | |
changes_made += 1 | |
pvc_name = selected_pvc.metadata.name | |
new_state = {} | |
new_state.update(state) | |
new_state["pvc_name"] = pvc_name | |
print( | |
f"!!!!! linking server {label} to pvc {pvc_name} {'(dry run)' * dry_run} !!!!" | |
) | |
if not dry_run: | |
# actually persist pvc_name in state | |
spawner.state = new_state | |
db.commit() | |
else: | |
print(f"{label} has no matching pvc") | |
# print any PVCs that we didn't find links for | |
for key, pvcs in user_pvcs.items(): | |
pvc_names = [pvc.metadata.name for pvc in pvcs] | |
# display every pvc we found, just in case our identification is wrong | |
print(f"PVCs not linked to a server ({key}): {pvc_names}") | |
if changes_made: | |
if dry_run: | |
print("This was a dry run, no changes were made.") | |
print(f"To actually apply the above {changes_made} changes, re-run with `--apply`") | |
else: | |
print(f"Made {changes_made} changes") | |
else: | |
print("Nothing to do!") | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment