Created
March 20, 2025 06:03
-
-
Save neelabalan/20d2855e9570f9a5501e9a8d630ec551 to your computer and use it in GitHub Desktop.
Extract from helm files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# generated with claude 3.7 Sonnet. This needs some tweaking. obviously. | |
import os | |
import re | |
import yaml | |
import glob | |
from pathlib import Path | |
from typing import dict, list, set, tuple | |
def extract_image_from_string(image_string: str) -> tuple[str, str]: | |
"""Extract repository and tag from an image string.""" | |
parts = image_string.split(':') | |
if len(parts) > 1: | |
return parts[0], parts[1] | |
return parts[0], 'latest' | |
def process_image_repository_tag(repo: str, tag_value) -> tuple[str, str]: | |
"""Process repository and tag values into a standardized format.""" | |
# Convert numeric tags to strings | |
if isinstance(tag_value, (int, float)): | |
tag = str(tag_value) | |
else: | |
tag = tag_value if tag_value else 'latest' | |
return repo, tag | |
def search_for_images_in_dict(obj: dict) -> list[tuple[str, str]]: | |
"""Search for Docker images in a dictionary object.""" | |
images = [] | |
# Direct image reference (image: repo:tag) | |
if 'image' in obj and isinstance(obj['image'], str): | |
images.append(extract_image_from_string(obj['image'])) | |
# Separate repository and tag fields | |
elif 'repository' in obj and isinstance(obj['repository'], str): | |
repo = obj['repository'] | |
tag = obj.get('tag', 'latest') | |
images.append(process_image_repository_tag(repo, tag)) | |
return images | |
def extract_images_recursive(obj) -> list[tuple[str, str]]: | |
"""Recursively extract Docker images from nested YAML structures.""" | |
images = [] | |
if isinstance(obj, dict): | |
# First check if this dict directly contains image info | |
images.extend(search_for_images_in_dict(obj)) | |
# Then recursively search all values | |
for value in obj.values(): | |
images.extend(extract_images_recursive(value)) | |
elif isinstance(obj, list): | |
for item in obj: | |
images.extend(extract_images_recursive(item)) | |
return images | |
def extract_images_from_yaml_file(file_path: str) -> list[tuple[str, str]]: | |
"""Extract Docker image names and tags from a YAML file.""" | |
try: | |
with open(file_path, 'r') as f: | |
content = yaml.safe_load(f) | |
if content is None: # Handle empty files | |
return [] | |
return extract_images_recursive(content) | |
except (yaml.YAMLError, IOError) as e: | |
print(f"Error processing {file_path}: {e}") | |
return [] | |
def find_chart_yaml_files(directory: str) -> list[str]: | |
"""Find all Chart.yaml files which indicate Helm charts.""" | |
return glob.glob(f"{directory}/**/Chart.yaml", recursive=True) | |
def get_yaml_files_in_templates(templates_dir: str) -> list[str]: | |
"""Get all YAML files in a templates directory.""" | |
yaml_files = glob.glob(f"{templates_dir}/**/*.yaml", recursive=True) | |
yml_files = glob.glob(f"{templates_dir}/**/*.yml", recursive=True) | |
return yaml_files + yml_files | |
def find_helm_files(directory: str) -> set[str]: | |
"""Recursively find all potential Helm chart YAML files in the given directory.""" | |
helm_files = set() | |
# Find Chart.yaml files which indicate Helm charts | |
chart_files = find_chart_yaml_files(directory) | |
for chart_file in chart_files: | |
chart_dir = os.path.dirname(chart_file) | |
# Add values.yaml if it exists | |
values_file = os.path.join(chart_dir, "values.yaml") | |
if os.path.exists(values_file): | |
helm_files.add(values_file) | |
# Add all yaml files in templates directory | |
templates_dir = os.path.join(chart_dir, "templates") | |
if os.path.exists(templates_dir): | |
for yaml_file in get_yaml_files_in_templates(templates_dir): | |
helm_files.add(yaml_file) | |
return helm_files | |
def collect_images_from_files(helm_files: set[str]) -> dict[str, set[str]]: | |
"""Collect all unique images and their tags from helm files.""" | |
all_images = {} | |
for file_path in helm_files: | |
images = extract_images_from_yaml_file(file_path) | |
for image, tag in images: | |
if image not in all_images: | |
all_images[image] = set() | |
all_images[image].add(tag) | |
return all_images | |
def print_results(all_images: dict[str, set[str]]) -> None: | |
"""Print the extracted images and tags in a readable format.""" | |
if not all_images: | |
print("No Docker images found in Helm charts.") | |
return | |
print("\nExtracted Docker images and tags:") | |
for image, tags in sorted(all_images.items()): | |
print(f"\n{image}:") | |
for tag in sorted(tags): | |
print(f" - {tag}") | |
# Print total count | |
total_images = sum(len(tags) for tags in all_images.values()) | |
print(f"\nTotal: {len(all_images)} unique images with {total_images} tags") | |
def main(): | |
# Start from current directory | |
current_dir = os.getcwd() | |
print(f"Scanning for Helm charts in: {current_dir}") | |
# Find all Helm chart files | |
helm_files = find_helm_files(current_dir) | |
print(f"Found {len(helm_files)} Helm chart files") | |
# Extract and collect images | |
all_images = collect_images_from_files(helm_files) | |
# Print results | |
print_results(all_images) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment