Created
March 6, 2025 09:26
-
-
Save kartben/e0b561bfe9f718d0731e3006562008aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import yaml | |
import sys | |
import argparse | |
from pathlib import Path | |
import statistics | |
import re | |
def get_first_sentence(text): | |
"""Extract the first sentence from a text block (typically a node description). | |
Args: | |
text: The text to extract the first sentence from. | |
Returns: | |
The first sentence found in the text, or the entire text if no sentence | |
boundary is found. | |
""" | |
if not text: | |
return "" | |
text = text.replace('\n', ' ') | |
# Split by double spaces to get paragraphs | |
paragraphs = text.split(' ') | |
first_paragraph = paragraphs[0].strip() | |
# Look for a period followed by a space in the first paragraph | |
period_match = re.search(r'(.*?)\.(?:\s|$)', first_paragraph) | |
if period_match: | |
return period_match.group(1).strip() | |
# If no period in the first paragraph, return the entire first paragraph | |
return first_paragraph | |
def find_descriptions(bindings_dir, threshold=None): | |
""" | |
Recursively search through binding files and collect description information. | |
Args: | |
bindings_dir: Path to the bindings directory | |
threshold: Optional character count threshold for filtering "long" descriptions | |
Returns: | |
List of tuples (file_path, description_length, first_line, full_description) | |
""" | |
descriptions = [] | |
# Walk through all directories under bindings_dir | |
for root, dirs, files in os.walk(bindings_dir): | |
for file in files: | |
if file.endswith('.yaml'): | |
file_path = os.path.join(root, file) | |
try: | |
with open(file_path, 'r') as f: | |
try: | |
# Load the YAML content | |
content = yaml.safe_load(f) | |
# Check if the file has a description field | |
if content and 'description' in content: | |
description = content['description'] | |
# If description is a string, get the first sentence | |
if isinstance(description, str): | |
first_sentence = get_first_sentence(description) | |
length = len(first_sentence) | |
# If threshold is None or the length exceeds threshold | |
if threshold is None or length > threshold: | |
rel_path = os.path.relpath(file_path, bindings_dir) | |
descriptions.append((rel_path, length, first_sentence, description)) | |
except yaml.YAMLError: | |
# Skip files with invalid YAML | |
pass | |
except Exception as e: | |
# Skip files that can't be read | |
pass | |
return descriptions | |
def main(): | |
parser = argparse.ArgumentParser(description='Find binding files with long descriptions') | |
parser.add_argument('bindings_dir', help='Path to the bindings directory') | |
parser.add_argument('-t', '--threshold', type=int, default=80, | |
help='Character count threshold for considering a description "long" (default: 80)') | |
parser.add_argument('-a', '--all', action='store_true', | |
help='Show all descriptions, not just long ones') | |
parser.add_argument('-f', '--full', action='store_true', | |
help='Show full descriptions, not just the first line') | |
parser.add_argument('-o', '--output', help='Output file to write results to') | |
parser.add_argument('-s', '--stats', action='store_true', | |
help='Show statistics about description lengths') | |
args = parser.parse_args() | |
if not os.path.isdir(args.bindings_dir): | |
print(f"Error: {args.bindings_dir} is not a valid directory") | |
sys.exit(1) | |
# Get all descriptions if --all is specified, otherwise only those exceeding threshold | |
threshold = None if args.all else args.threshold | |
descriptions = find_descriptions(args.bindings_dir, threshold) | |
# Sort by description length (longest first) | |
descriptions.sort(key=lambda x: x[1], reverse=True) | |
# Prepare output | |
output_lines = [] | |
if not descriptions: | |
message = "No binding files with descriptions found." | |
output_lines.append(message) | |
else: | |
if args.all: | |
message = f"Found {len(descriptions)} binding files with descriptions:" | |
else: | |
message = f"Found {len(descriptions)} binding files with descriptions longer than {args.threshold} characters:" | |
output_lines.append(message) | |
# Add statistics if requested | |
if args.stats: | |
lengths = [length for _, length, _, _ in descriptions] | |
output_lines.append("\nStatistics:") | |
output_lines.append(f" Min length: {min(lengths)}") | |
output_lines.append(f" Max length: {max(lengths)}") | |
output_lines.append(f" Average length: {sum(lengths) / len(lengths):.2f}") | |
output_lines.append(f" Median length: {statistics.median(lengths):.2f}") | |
if len(lengths) > 1: | |
output_lines.append(f" Standard deviation: {statistics.stdev(lengths):.2f}") | |
output_lines.append("\nFormat: LENGTH | FILE | DESCRIPTION") | |
output_lines.append("-" * 100) | |
for file_path, length, first_line, full_description in descriptions: | |
# Determine which description to display | |
if args.full: | |
display_desc = full_description | |
else: | |
# Truncate very long descriptions for display | |
display_desc = first_line[:100] + "..." if len(first_line) > 100 else first_line | |
output_lines.append(f"{length:4d} | {file_path:50s} | {display_desc}") | |
# Output results | |
output_text = "\n".join(output_lines) | |
if args.output: | |
with open(args.output, 'w') as f: | |
f.write(output_text) | |
print(f"Results written to {args.output}") | |
else: | |
print(output_text) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment