Skip to content

Instantly share code, notes, and snippets.

@kartben
Created March 6, 2025 09:26
Show Gist options
  • Save kartben/e0b561bfe9f718d0731e3006562008aa to your computer and use it in GitHub Desktop.
Save kartben/e0b561bfe9f718d0731e3006562008aa to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import yaml
import sys
import argparse
from pathlib import Path
import statistics
import re
def get_first_sentence(text):
"""Extract the first sentence from a text block (typically a node description).
Args:
text: The text to extract the first sentence from.
Returns:
The first sentence found in the text, or the entire text if no sentence
boundary is found.
"""
if not text:
return ""
text = text.replace('\n', ' ')
# Split by double spaces to get paragraphs
paragraphs = text.split(' ')
first_paragraph = paragraphs[0].strip()
# Look for a period followed by a space in the first paragraph
period_match = re.search(r'(.*?)\.(?:\s|$)', first_paragraph)
if period_match:
return period_match.group(1).strip()
# If no period in the first paragraph, return the entire first paragraph
return first_paragraph
def find_descriptions(bindings_dir, threshold=None):
"""
Recursively search through binding files and collect description information.
Args:
bindings_dir: Path to the bindings directory
threshold: Optional character count threshold for filtering "long" descriptions
Returns:
List of tuples (file_path, description_length, first_line, full_description)
"""
descriptions = []
# Walk through all directories under bindings_dir
for root, dirs, files in os.walk(bindings_dir):
for file in files:
if file.endswith('.yaml'):
file_path = os.path.join(root, file)
try:
with open(file_path, 'r') as f:
try:
# Load the YAML content
content = yaml.safe_load(f)
# Check if the file has a description field
if content and 'description' in content:
description = content['description']
# If description is a string, get the first sentence
if isinstance(description, str):
first_sentence = get_first_sentence(description)
length = len(first_sentence)
# If threshold is None or the length exceeds threshold
if threshold is None or length > threshold:
rel_path = os.path.relpath(file_path, bindings_dir)
descriptions.append((rel_path, length, first_sentence, description))
except yaml.YAMLError:
# Skip files with invalid YAML
pass
except Exception as e:
# Skip files that can't be read
pass
return descriptions
def main():
parser = argparse.ArgumentParser(description='Find binding files with long descriptions')
parser.add_argument('bindings_dir', help='Path to the bindings directory')
parser.add_argument('-t', '--threshold', type=int, default=80,
help='Character count threshold for considering a description "long" (default: 80)')
parser.add_argument('-a', '--all', action='store_true',
help='Show all descriptions, not just long ones')
parser.add_argument('-f', '--full', action='store_true',
help='Show full descriptions, not just the first line')
parser.add_argument('-o', '--output', help='Output file to write results to')
parser.add_argument('-s', '--stats', action='store_true',
help='Show statistics about description lengths')
args = parser.parse_args()
if not os.path.isdir(args.bindings_dir):
print(f"Error: {args.bindings_dir} is not a valid directory")
sys.exit(1)
# Get all descriptions if --all is specified, otherwise only those exceeding threshold
threshold = None if args.all else args.threshold
descriptions = find_descriptions(args.bindings_dir, threshold)
# Sort by description length (longest first)
descriptions.sort(key=lambda x: x[1], reverse=True)
# Prepare output
output_lines = []
if not descriptions:
message = "No binding files with descriptions found."
output_lines.append(message)
else:
if args.all:
message = f"Found {len(descriptions)} binding files with descriptions:"
else:
message = f"Found {len(descriptions)} binding files with descriptions longer than {args.threshold} characters:"
output_lines.append(message)
# Add statistics if requested
if args.stats:
lengths = [length for _, length, _, _ in descriptions]
output_lines.append("\nStatistics:")
output_lines.append(f" Min length: {min(lengths)}")
output_lines.append(f" Max length: {max(lengths)}")
output_lines.append(f" Average length: {sum(lengths) / len(lengths):.2f}")
output_lines.append(f" Median length: {statistics.median(lengths):.2f}")
if len(lengths) > 1:
output_lines.append(f" Standard deviation: {statistics.stdev(lengths):.2f}")
output_lines.append("\nFormat: LENGTH | FILE | DESCRIPTION")
output_lines.append("-" * 100)
for file_path, length, first_line, full_description in descriptions:
# Determine which description to display
if args.full:
display_desc = full_description
else:
# Truncate very long descriptions for display
display_desc = first_line[:100] + "..." if len(first_line) > 100 else first_line
output_lines.append(f"{length:4d} | {file_path:50s} | {display_desc}")
# Output results
output_text = "\n".join(output_lines)
if args.output:
with open(args.output, 'w') as f:
f.write(output_text)
print(f"Results written to {args.output}")
else:
print(output_text)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment