Skip to content

Instantly share code, notes, and snippets.

@neelabalan
Created March 20, 2025 06:03
Show Gist options
  • Save neelabalan/20d2855e9570f9a5501e9a8d630ec551 to your computer and use it in GitHub Desktop.
Save neelabalan/20d2855e9570f9a5501e9a8d630ec551 to your computer and use it in GitHub Desktop.
Extract from helm files
#!/usr/bin/env python3
# generated with claude 3.7 Sonnet. This needs some tweaking. obviously.
import os
import re
import yaml
import glob
from pathlib import Path
from typing import dict, list, set, tuple
def extract_image_from_string(image_string: str) -> tuple[str, str]:
"""Extract repository and tag from an image string."""
parts = image_string.split(':')
if len(parts) > 1:
return parts[0], parts[1]
return parts[0], 'latest'
def process_image_repository_tag(repo: str, tag_value) -> tuple[str, str]:
"""Process repository and tag values into a standardized format."""
# Convert numeric tags to strings
if isinstance(tag_value, (int, float)):
tag = str(tag_value)
else:
tag = tag_value if tag_value else 'latest'
return repo, tag
def search_for_images_in_dict(obj: dict) -> list[tuple[str, str]]:
"""Search for Docker images in a dictionary object."""
images = []
# Direct image reference (image: repo:tag)
if 'image' in obj and isinstance(obj['image'], str):
images.append(extract_image_from_string(obj['image']))
# Separate repository and tag fields
elif 'repository' in obj and isinstance(obj['repository'], str):
repo = obj['repository']
tag = obj.get('tag', 'latest')
images.append(process_image_repository_tag(repo, tag))
return images
def extract_images_recursive(obj) -> list[tuple[str, str]]:
"""Recursively extract Docker images from nested YAML structures."""
images = []
if isinstance(obj, dict):
# First check if this dict directly contains image info
images.extend(search_for_images_in_dict(obj))
# Then recursively search all values
for value in obj.values():
images.extend(extract_images_recursive(value))
elif isinstance(obj, list):
for item in obj:
images.extend(extract_images_recursive(item))
return images
def extract_images_from_yaml_file(file_path: str) -> list[tuple[str, str]]:
"""Extract Docker image names and tags from a YAML file."""
try:
with open(file_path, 'r') as f:
content = yaml.safe_load(f)
if content is None: # Handle empty files
return []
return extract_images_recursive(content)
except (yaml.YAMLError, IOError) as e:
print(f"Error processing {file_path}: {e}")
return []
def find_chart_yaml_files(directory: str) -> list[str]:
"""Find all Chart.yaml files which indicate Helm charts."""
return glob.glob(f"{directory}/**/Chart.yaml", recursive=True)
def get_yaml_files_in_templates(templates_dir: str) -> list[str]:
"""Get all YAML files in a templates directory."""
yaml_files = glob.glob(f"{templates_dir}/**/*.yaml", recursive=True)
yml_files = glob.glob(f"{templates_dir}/**/*.yml", recursive=True)
return yaml_files + yml_files
def find_helm_files(directory: str) -> set[str]:
"""Recursively find all potential Helm chart YAML files in the given directory."""
helm_files = set()
# Find Chart.yaml files which indicate Helm charts
chart_files = find_chart_yaml_files(directory)
for chart_file in chart_files:
chart_dir = os.path.dirname(chart_file)
# Add values.yaml if it exists
values_file = os.path.join(chart_dir, "values.yaml")
if os.path.exists(values_file):
helm_files.add(values_file)
# Add all yaml files in templates directory
templates_dir = os.path.join(chart_dir, "templates")
if os.path.exists(templates_dir):
for yaml_file in get_yaml_files_in_templates(templates_dir):
helm_files.add(yaml_file)
return helm_files
def collect_images_from_files(helm_files: set[str]) -> dict[str, set[str]]:
"""Collect all unique images and their tags from helm files."""
all_images = {}
for file_path in helm_files:
images = extract_images_from_yaml_file(file_path)
for image, tag in images:
if image not in all_images:
all_images[image] = set()
all_images[image].add(tag)
return all_images
def print_results(all_images: dict[str, set[str]]) -> None:
"""Print the extracted images and tags in a readable format."""
if not all_images:
print("No Docker images found in Helm charts.")
return
print("\nExtracted Docker images and tags:")
for image, tags in sorted(all_images.items()):
print(f"\n{image}:")
for tag in sorted(tags):
print(f" - {tag}")
# Print total count
total_images = sum(len(tags) for tags in all_images.values())
print(f"\nTotal: {len(all_images)} unique images with {total_images} tags")
def main():
# Start from current directory
current_dir = os.getcwd()
print(f"Scanning for Helm charts in: {current_dir}")
# Find all Helm chart files
helm_files = find_helm_files(current_dir)
print(f"Found {len(helm_files)} Helm chart files")
# Extract and collect images
all_images = collect_images_from_files(helm_files)
# Print results
print_results(all_images)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment