Skip to content

Instantly share code, notes, and snippets.

@benoittgt
Last active December 18, 2025 14:16
Show Gist options
  • Select an option

  • Save benoittgt/4dc2961c3ab011bc7e1bd0d8d6aae4c3 to your computer and use it in GitHub Desktop.

Select an option

Save benoittgt/4dc2961c3ab011bc7e1bd0d8d6aae4c3 to your computer and use it in GitHub Desktop.
Show as a tree structure the content for the `docker buildx du --verbose`
#!/usr/bin/env python3
"""
Parse docker buildx du --verbose output and generate a graph visualization.
Usage:
docker buildx du --verbose | python visualize_cache_graph.py
# Or from file:
python visualize_cache_graph.py < cache-output.txt
"""
import sys
import re
from collections import defaultdict
# ANSI color codes
class Colors:
RESET = '\033[0m'
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
MAGENTA = '\033[95m'
CYAN = '\033[96m'
GRAY = '\033[90m'
BOLD = '\033[1m'
def parse_cache_entries(text):
"""Parse docker buildx du --verbose output into structured data."""
entries = []
current_entry = {}
in_parents_section = False
for line in text.strip().split('\n'):
if line.startswith('ID:'):
if current_entry:
entries.append(current_entry)
current_entry = {'id': line.split()[1], 'parent': None}
in_parents_section = False
elif line.startswith('Parent:') and current_entry:
# Old format: Parent: abc123
current_entry['parent'] = line.split()[1]
in_parents_section = False
elif line.startswith('Parents:') and current_entry:
# New format: Parents: (followed by list)
in_parents_section = True
elif in_parents_section and line.strip().startswith('- '):
# New format parent entry: " - abc123"
# Only take the first parent
if not current_entry.get('parent'):
current_entry['parent'] = line.strip()[2:].strip()
elif line.startswith('Size:') and current_entry:
current_entry['size'] = ' '.join(line.split()[1:])
in_parents_section = False
elif line.startswith('Description:') and current_entry:
current_entry['description'] = line.replace('Description:', '').strip()
in_parents_section = False
elif line.startswith('Last used:') and current_entry:
current_entry['last_used'] = line.replace('Last used:', '').strip()
in_parents_section = False
elif line.startswith('Usage count:') and current_entry:
current_entry['usage_count'] = line.replace('Usage count:', '').strip()
in_parents_section = False
elif line.startswith('Created at:') or line.startswith('Mutable:') or \
line.startswith('Reclaimable:') or line.startswith('Shared:') or \
line.startswith('Type:'):
# Other fields end the Parents section
in_parents_section = False
if current_entry:
entries.append(current_entry)
return entries
def build_tree(entries):
"""Build parent-child tree structure."""
children = defaultdict(list)
all_ids = {e['id'] for e in entries}
# Build children map
for entry in entries:
parent = entry.get('parent')
if parent and parent in all_ids:
children[parent].append(entry)
# Find roots (entries with no parent or parent not in cache)
roots = [e for e in entries if not e.get('parent') or e.get('parent') not in all_ids]
return roots, children
def format_size(size_str):
"""Format size for display with color based on size."""
if not size_str:
return ""
# Color based on size (parse GB values)
color = Colors.RESET
if 'GB' in size_str or 'TB' in size_str:
# Large sizes in red/bold
color = Colors.RED + Colors.BOLD
elif 'MB' in size_str:
# Parse MB value
try:
mb_val = float(size_str.split('MB')[0])
if mb_val > 100:
color = Colors.YELLOW
except:
pass
return f"{color}[{size_str}]{Colors.RESET}"
def get_age_color(last_used):
"""Get color based on age of cache entry."""
if not last_used:
return Colors.RESET
# Color based on recency
if 'second' in last_used or 'minute' in last_used:
return Colors.GREEN # Recent
elif 'hour' in last_used:
hours = int(last_used.split()[0]) if last_used.split()[0].isdigit() else 0
if hours < 2:
return Colors.GREEN
else:
return Colors.YELLOW # Medium age
else:
return Colors.RED # Old (days/weeks/months)
def print_tree(entry, children, prefix="", is_last=True, visited=None, depth=0, max_depth=50):
"""Print tree structure recursively with cycle detection."""
if visited is None:
visited = set()
# Prevent infinite loops - skip duplicates silently
entry_id = entry['id']
if entry_id in visited:
return
if depth > max_depth:
connector = "└── " if is_last else "├── "
print(f"{prefix}{connector}[MAX DEPTH REACHED]")
return
visited.add(entry_id)
# Format entry info
size = format_size(entry.get('size', ''))
last_used = entry.get('last_used', '')
usage_count = entry.get('usage_count', '')
desc = entry.get('description', 'No description')[:80]
# Print current entry
connector = "└── " if is_last else "├── "
# Build metadata string with color
metadata = []
age_color = get_age_color(last_used)
if last_used:
metadata.append(f"{age_color}{last_used}{Colors.RESET}")
if usage_count:
# Low usage count in gray
count_color = Colors.GRAY if usage_count == '1' else Colors.CYAN
metadata.append(f"{count_color}used {usage_count}x{Colors.RESET}")
# Colorize ID
id_str = f"{Colors.CYAN}{entry_id[:15]}{Colors.RESET}"
if metadata:
meta_str = f"({', '.join(metadata)})"
print(f"{prefix}{connector}{id_str} {size} {meta_str} {desc}")
else:
print(f"{prefix}{connector}{id_str} {size} {desc}")
# Print children
child_list = children.get(entry_id, [])
child_prefix = prefix + (" " if is_last else "│ ")
for i, child in enumerate(child_list):
is_last_child = (i == len(child_list) - 1)
# Use SAME visited set for all branches (not .copy())
print_tree(child, children, child_prefix, is_last_child, visited, depth + 1, max_depth)
def main():
# Read input
input_text = sys.stdin.read()
# Debug: show what we received
if not input_text or not input_text.strip():
print("ERROR: No input received from docker buildx du --verbose")
print("Tip: Run 'docker buildx du --verbose' manually to check if it works")
return
# Parse entries
entries = parse_cache_entries(input_text)
if not entries:
print("No cache entries parsed from input")
print(f"Input length: {len(input_text)} characters")
print("First 500 characters:")
print(input_text[:500])
print("\nPossible issues:")
print(" - docker buildx du --verbose format changed")
print(" - No cache exists yet")
print(" - BuildKit not available")
return
print(f"Found {len(entries)} cache entries")
# Debug info
entries_with_parents = sum(1 for e in entries if e.get('parent'))
print(f"Entries with parent field: {entries_with_parents}")
# Build tree
roots, children = build_tree(entries)
# Debug tree building
all_ids = {e['id'] for e in entries}
parents_matched = sum(1 for e in entries if e.get('parent') and e.get('parent') in all_ids)
print(f"Parent-child relationships found: {parents_matched}")
print(f"Root nodes: {len(roots)}")
print()
# Print tree for each root
print("Cache Hierarchy:")
print("=" * 80)
for i, root in enumerate(roots):
is_last = (i == len(roots) - 1)
print_tree(root, children, "", is_last)
if not is_last:
print()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment