Last active
May 6, 2026 13:17
-
-
Save tdewin/8254b00a03ac073623f7359ff2566739 to your computer and use it in GitHub Desktop.
gather-insights ( MIT LICENSE, VAGUELY TESTED, BE CAREFUL, NO WARRANTY, FOR DEMO ENV ONLY)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # MIT LICENSE: sample code only, review and only for demo env | |
| # BE CAREFUL AI GENERATED BASED ON gather.py PROMPT with kubernetes api | |
| # BE CAREFUL AI GENERATED BASED ON gather.py PROMPT with kubernetes api | |
| # BE CAREFUL AI GENERATED BASED ON gather.py PROMPT with kubernetes api | |
| # BE CAREFUL AI GENERATED BASED ON gather.py PROMPT with kubernetes api | |
| # BE CAREFUL AI GENERATED BASED ON gather.py PROMPT with kubernetes api | |
| # Makefile | |
| ''' | |
| run: | |
| venv/bin/python3 collect.py | |
| py: | |
| venv/bin/python3 | |
| terminal: | |
| sh -c "source venv/bin/activate;bash" | |
| env: | |
| python -m venv venv | |
| dep: | |
| venv/bin/pip install kubernetes | |
| pip: | |
| sudo dnf install python3-pip | |
| ''' | |
| # disable warnings by manually importing first | |
| import urllib3 | |
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
| import socket | |
| import time | |
| import base64 | |
| import tempfile | |
| import tarfile | |
| import yaml | |
| from pathlib import Path | |
| from kubernetes import client, config, stream | |
| from kubernetes.client.exceptions import ApiException | |
| NS = "openshift-insights" | |
| JOB_NAME = "insights-operator-job-manual" | |
| LOCAL_DIR = Path("./insights-data") | |
| # The Template embedded directly | |
| GATHER_JOB_YAML = """ | |
| apiVersion: batch/v1 | |
| kind: Job | |
| metadata: | |
| name: {job_name} | |
| annotations: | |
| config.openshift.io/inject-proxy: insights-operator | |
| spec: | |
| backoffLimit: 6 | |
| ttlSecondsAfterFinished: 600 | |
| template: | |
| spec: | |
| restartPolicy: OnFailure | |
| serviceAccountName: operator | |
| # nodeSelector: | |
| # beta.kubernetes.io/os: linux | |
| # node-role.kubernetes.io/master: "" | |
| tolerations: | |
| - effect: NoSchedule | |
| key: node-role.kubernetes.io/master | |
| operator: Exists | |
| - effect: NoExecute | |
| key: node.kubernetes.io/unreachable | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| - effect: NoExecute | |
| key: node.kubernetes.io/not-ready | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| volumes: | |
| - name: snapshots | |
| emptyDir: {{}} | |
| - name: service-ca-bundle | |
| configMap: | |
| name: service-ca-bundle | |
| optional: true | |
| initContainers: | |
| - name: insights-operator | |
| image: {image_url} | |
| terminationMessagePolicy: FallbackToLogsOnError | |
| volumeMounts: | |
| - name: snapshots | |
| mountPath: /var/lib/insights-operator | |
| - name: service-ca-bundle | |
| mountPath: /var/run/configmaps/service-ca-bundle | |
| readOnly: true | |
| ports: | |
| - containerPort: 8443 | |
| name: https | |
| resources: | |
| requests: | |
| cpu: 10m | |
| memory: 70Mi | |
| args: | |
| - gather | |
| - -v=4 | |
| - --config=/etc/insights-operator/server.yaml | |
| containers: | |
| - name: sleepy | |
| image: quay.io/openshift/origin-base:latest | |
| args: | |
| - /bin/sh | |
| - -c | |
| - sleep 10m | |
| volumeMounts: [{{name: snapshots, mountPath: /var/lib/insights-operator}}] | |
| """ | |
| def main(): | |
| # Load kubernetes configuration (e.g., ~/.kube/config) | |
| config.load_kube_config() | |
| v1 = client.CoreV1Api() | |
| apps_v1 = client.AppsV1Api() | |
| batch_v1 = client.BatchV1Api() | |
| # 1. Check for existing job and wait up to 10 times | |
| print(f"Checking for existing job: {JOB_NAME}...") | |
| job_clear = False | |
| for i in range(10): | |
| try: | |
| batch_v1.read_namespaced_job(name=JOB_NAME, namespace=NS) | |
| print(f"Job is busy. Waiting 10 seconds... ({i+1}/10)") | |
| time.sleep(10) | |
| except ApiException as e: | |
| if e.status == 404: | |
| job_clear = True | |
| break | |
| raise | |
| if not job_clear: | |
| print(f"Error: Job {JOB_NAME} still exists after waiting. Exiting.") | |
| return | |
| # 2. Extract Image URL from the live deployment | |
| print("Fetching current insights-operator image...") | |
| try: | |
| deployment = apps_v1.read_namespaced_deployment(name="insights-operator", namespace=NS) | |
| img_url = deployment.spec.template.spec.containers[0].image | |
| except ApiException as e: | |
| print(f"Error: Could not find insights-operator deployment. Details: {e}") | |
| return | |
| # 3. Apply the YAML with injected Image | |
| print("Deploying Gather Job...") | |
| formatted_yaml = GATHER_JOB_YAML.format(job_name=JOB_NAME, image_url=img_url) | |
| job_dict = yaml.safe_load(formatted_yaml) | |
| batch_v1.create_namespaced_job(namespace=NS, body=job_dict) | |
| # 4. Identify the specific Pod Name | |
| pod_name = "" | |
| print("Waiting for Pod to be created...") | |
| while not pod_name: | |
| pods = v1.list_namespaced_pod(namespace=NS, label_selector=f"job-name={JOB_NAME}") | |
| if pods.items: | |
| pod_name = pods.items[0].metadata.name | |
| else: | |
| time.sleep(1) | |
| print(f"Tracking Pod: {pod_name}") | |
| # 5. Wait for the Init Container to terminate with exit code 0 | |
| print("Waiting for init container 'insights-operator' to complete...") | |
| init_completed = False | |
| while not init_completed: | |
| pod = v1.read_namespaced_pod(name=pod_name, namespace=NS) | |
| if pod.status.init_container_statuses: | |
| init_state = pod.status.init_container_statuses[0].state | |
| if init_state.terminated: | |
| if init_state.terminated.exit_code == 0: | |
| print("✅ Data collection complete.") | |
| init_completed = True | |
| else: | |
| print(f"⚠️ Init container failed with exit code: {init_state.terminated.exit_code}") | |
| return | |
| if pod.status.phase == "Failed": | |
| print("⚠️ Pod is in error state (Failed). Check logs.") | |
| return | |
| if not init_completed: | |
| time.sleep(5) | |
| # 5.5 Wait for the main container ('sleepy') to actually be running | |
| # This prevents the 'container not found' 500 Error when exec-ing too quickly | |
| print("Waiting for main container 'sleepy' to start...") | |
| while True: | |
| pod = v1.read_namespaced_pod(name=pod_name, namespace=NS) | |
| if pod.status.phase == "Running": | |
| if pod.status.container_statuses: | |
| sleepy_state = pod.status.container_statuses[0].state | |
| if sleepy_state.running: | |
| break | |
| time.sleep(2) | |
| # 6. Transfer Data locally (Native replacement for 'oc cp') | |
| LOCAL_DIR.mkdir(exist_ok=True) | |
| print(f"Downloading data natively to {LOCAL_DIR}...") | |
| # We use tar + base64 inside the pod to safely stream binary data over the K8s WebSocket | |
| exec_cmd = ['/bin/sh', '-c', 'tar czf - -C /var/lib/insights-operator . 2>/dev/null | base64 -w 0'] | |
| resp = stream.stream( | |
| v1.connect_get_namespaced_pod_exec, | |
| pod_name, | |
| NS, | |
| container="sleepy", | |
| command=exec_cmd, | |
| stderr=True, stdin=False, | |
| stdout=True, tty=False | |
| ) | |
| clean_resp = resp.strip() | |
| if not clean_resp: | |
| print("❌ Error: Received empty data stream from the pod.") | |
| return | |
| try: | |
| tar_data = base64.b64decode(clean_resp) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tmp: | |
| tmp.write(tar_data) | |
| tmp_name = tmp.name | |
| with tarfile.open(tmp_name, 'r:gz') as tar: | |
| tar.extractall(path=LOCAL_DIR) | |
| except Exception as e: | |
| print(f"❌ Error decoding or extracting archive: {e}") | |
| return | |
| finally: | |
| if 'tmp_name' in locals() and Path(tmp_name).exists(): | |
| Path(tmp_name).unlink() # Clean up temp file | |
| # 7. Cleanup the remote Job | |
| print("Cleaning up remote Job...") | |
| batch_v1.delete_namespaced_job( | |
| name=JOB_NAME, | |
| namespace=NS, | |
| body=client.V1DeleteOptions(propagation_policy='Background') | |
| ) | |
| # 8. Find the most recent .gz file (last 5 mins) | |
| now = time.time() | |
| gz_files = [f for f in LOCAL_DIR.rglob("*.gz") if (now - f.stat().st_mtime) < 300] | |
| if gz_files: | |
| latest_bundle = gz_files[0] | |
| hostname = socket.gethostname() | |
| print("\n" + "="*40) | |
| print(f"BUNDLE READY: {latest_bundle.name}") | |
| print("SUGGESTED TRANSFER:") | |
| print(f"scp {latest_bundle} user@host:~/collect/{hostname}") | |
| print("="*40) | |
| else: | |
| print("\n❌ No recent .gz bundle found in local directory.") | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # BE CAREFUL AI GENERATED BASED ON BASH SCRIPT ABOVE | |
| # BE CAREFUL AI GENERATED BASED ON BASH SCRIPT ABOVE | |
| # BE CAREFUL AI GENERATED BASED ON BASH SCRIPT ABOVE | |
| # BE CAREFUL AI GENERATED BASED ON BASH SCRIPT ABOVE | |
| # MIT LICENSE: sample code only, review and only for demo env | |
| # | |
| import socket | |
| import json | |
| import subprocess | |
| import time | |
| from pathlib import Path | |
| # --- Configuration --- | |
| NS = "openshift-insights" | |
| JOB_NAME = "insights-operator-job" | |
| LOCAL_DIR = Path("./insights-data") | |
| # The Template embedded directly | |
| GATHER_JOB_YAML = """ | |
| apiVersion: batch/v1 | |
| kind: Job | |
| metadata: | |
| name: {job_name} | |
| annotations: | |
| config.openshift.io/inject-proxy: insights-operator | |
| spec: | |
| backoffLimit: 6 | |
| ttlSecondsAfterFinished: 600 | |
| template: | |
| spec: | |
| restartPolicy: OnFailure | |
| serviceAccountName: operator | |
| # nodeSelector: | |
| # beta.kubernetes.io/os: linux | |
| # node-role.kubernetes.io/master: "" | |
| tolerations: | |
| - effect: NoSchedule | |
| key: node-role.kubernetes.io/master | |
| operator: Exists | |
| - effect: NoExecute | |
| key: node.kubernetes.io/unreachable | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| - effect: NoExecute | |
| key: node.kubernetes.io/not-ready | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| volumes: | |
| - name: snapshots | |
| emptyDir: {{}} | |
| - name: service-ca-bundle | |
| configMap: | |
| name: service-ca-bundle | |
| optional: true | |
| initContainers: | |
| - name: insights-operator | |
| image: {image_url} | |
| terminationMessagePolicy: FallbackToLogsOnError | |
| volumeMounts: | |
| - name: snapshots | |
| mountPath: /var/lib/insights-operator | |
| - name: service-ca-bundle | |
| mountPath: /var/run/configmaps/service-ca-bundle | |
| readOnly: true | |
| ports: | |
| - containerPort: 8443 | |
| name: https | |
| resources: | |
| requests: | |
| cpu: 10m | |
| memory: 70Mi | |
| args: | |
| - gather | |
| - -v=4 | |
| - --config=/etc/insights-operator/server.yaml | |
| containers: | |
| - name: sleepy | |
| image: quay.io/openshift/origin-base:latest | |
| args: | |
| - /bin/sh | |
| - -c | |
| - sleep 10m | |
| volumeMounts: [{{name: snapshots, mountPath: /var/lib/insights-operator}}] | |
| """ | |
| def run_shell(cmd, input_data=None): | |
| """Executes shell commands and returns stripped output.""" | |
| res = subprocess.run( | |
| cmd, | |
| shell=True, | |
| input=input_data, | |
| capture_output=True, | |
| text=True | |
| ) | |
| return res.stdout.strip() | |
| def run_oc(cmd, input_data=None): | |
| """Executes oc commands and returns stripped output.""" | |
| res = subprocess.run( | |
| f"oc {cmd}", | |
| shell=True, | |
| input=input_data, | |
| capture_output=True, | |
| text=True | |
| ) | |
| return res.stdout.strip() | |
| def main(): | |
| # 1. Cleanup: Ensure no old job is hanging around | |
| print(f"Checking for existing job: {JOB_NAME}...") | |
| while run_oc(f"get pod -n {NS} -l job-name={JOB_NAME} -o name"): | |
| print(f"Job is busy. Please run: oc delete job -n {NS} {JOB_NAME}") | |
| time.sleep(3) | |
| # 2. Extract Image URL from the live deployment | |
| print("Fetching current insights-operator image...") | |
| deploy_raw = run_oc(f"get -n {NS} deployment insights-operator -o json") | |
| if not deploy_raw: | |
| print("Error: Could not find insights-operator deployment.") | |
| return | |
| img_url = json.loads(deploy_raw)['spec']['template']['spec']['containers'][0]['image'] | |
| # 3. Apply the YAML with injected Image | |
| print(f"Deploying Gather Job...") | |
| formatted_yaml = GATHER_JOB_YAML.format(job_name=JOB_NAME, image_url=img_url) | |
| run_oc(f"apply -n {NS} -f -", input_data=formatted_yaml) | |
| # 4. Identify the specific Pod Name | |
| pod_name = "" | |
| while not pod_name: | |
| pod_name = run_oc(f"get pod -n {NS} -l job-name={JOB_NAME} -o custom-columns=:metadata.name --no-headers") | |
| time.sleep(1) | |
| print(f"Tracking Pod: {pod_name}") | |
| # 5. Wait for the Init Container to terminate with exit code 0 | |
| print("Waiting for init container 'insights-operator' to complete...") | |
| exit_path = "'{.status.initContainerStatuses[0].state.terminated.exitCode}'" | |
| while True: | |
| exit_code = run_oc(f"get -n {NS} pod {pod_name} -o jsonpath={exit_path}") | |
| if exit_code == "0": | |
| print("✅ Data collection complete.") | |
| break | |
| # Check for error state | |
| if "error" in run_oc(f"get -n {NS} pod {pod_name}").lower(): | |
| print("⚠️ Pod is in error state. Check logs.") | |
| time.sleep(5) | |
| # 6. Transfer Data locally | |
| LOCAL_DIR.mkdir(exist_ok=True) | |
| print(f"Downloading data to {LOCAL_DIR}...") | |
| run_oc(f"cp {NS}/{pod_name}:/var/lib/insights-operator {LOCAL_DIR}") | |
| # 7. Cleanup the remote Job | |
| run_oc(f"delete job -n {NS} {JOB_NAME}") | |
| # 8. Find the most recent .gz file (last 5 mins) | |
| now = time.time() | |
| gz_files = [f for f in LOCAL_DIR.rglob("*.gz") if (now - f.stat().st_mtime) < 300] | |
| if gz_files: | |
| latest_bundle = gz_files[0] | |
| hostname = socket.gethostname() | |
| print("\n" + "="*40) | |
| print(f"BUNDLE READY: {latest_bundle.name}") | |
| print(f"SUGGESTED TRANSFER:") | |
| print(f"scp {latest_bundle} user@host:~/collect/{hostname}") | |
| print("="*40) | |
| else: | |
| print("\n❌ No recent .gz bundle found in local directory.") | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| NS=openshift-insights | |
| JOB=insights-operator-job | |
| while [ -n "$(oc get pod -n $NS -l job-name=$JOB -o name)" ]; do | |
| echo 'still busy..' | |
| echo "consider: oc delete job -n $NS $JOB" | |
| sleep 3 | |
| done | |
| IMG=$(oc get -n $NS deployment insights-operator -o json | jq .spec.template.spec.containers[0].image) | |
| cat gather-job.yaml | sed "s,{{INSIGHTS_IMG}},$IMG," | oc apply -n $NS -f - | |
| POD_NAME=$(oc get pod -n $NS -l job-name=$JOB -o custom-columns=:metadata.name --no-headers) | |
| echo $NS $JOB $POD_NAME | |
| until oc get -n $NS pod "$POD_NAME" -o jsonpath='{.status.initContainerStatuses[0].state.terminated.exitCode}' | grep -q "^0$"; do | |
| echo "waiting.." | |
| oc get -n $NS pod "$POD_NAME" --no-headers | |
| sleep 5 | |
| done | |
| oc cp $NS/$POD_NAME:/var/lib/insights-operator ./insights-data | |
| oc delete job -n $NS $JOB | |
| # find recent GZ bundle | |
| GZ=$(find insights-data -iname '*.gz' -mmin -5) | |
| echo scp $GZ user@host:~/collect/$(hostname) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| apiVersion: batch/v1 | |
| kind: Job | |
| metadata: | |
| name: insights-operator-job | |
| annotations: | |
| config.openshift.io/inject-proxy: insights-operator | |
| spec: | |
| backoffLimit: 6 | |
| ttlSecondsAfterFinished: 600 | |
| template: | |
| spec: | |
| restartPolicy: OnFailure | |
| serviceAccountName: operator | |
| # nodeSelector: | |
| # beta.kubernetes.io/os: linux | |
| # node-role.kubernetes.io/master: "" | |
| tolerations: | |
| - effect: NoSchedule | |
| key: node-role.kubernetes.io/master | |
| operator: Exists | |
| - effect: NoExecute | |
| key: node.kubernetes.io/unreachable | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| - effect: NoExecute | |
| key: node.kubernetes.io/not-ready | |
| operator: Exists | |
| tolerationSeconds: 900 | |
| volumes: | |
| - name: snapshots | |
| emptyDir: {} | |
| - name: service-ca-bundle | |
| configMap: | |
| name: service-ca-bundle | |
| optional: true | |
| initContainers: | |
| - name: insights-operator | |
| image: {{INSIGHTS_IMG}} | |
| terminationMessagePolicy: FallbackToLogsOnError | |
| volumeMounts: | |
| - name: snapshots | |
| mountPath: /var/lib/insights-operator | |
| - name: service-ca-bundle | |
| mountPath: /var/run/configmaps/service-ca-bundle | |
| readOnly: true | |
| ports: | |
| - containerPort: 8443 | |
| name: https | |
| resources: | |
| requests: | |
| cpu: 10m | |
| memory: 70Mi | |
| args: | |
| - gather | |
| - -v=4 | |
| - --config=/etc/insights-operator/server.yaml | |
| containers: | |
| - name: sleepy | |
| image: quay.io/openshift/origin-base:latest | |
| args: | |
| - /bin/sh | |
| - -c | |
| - sleep 10m | |
| volumeMounts: [{name: snapshots, mountPath: /var/lib/insights-operator}] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment