Created
August 3, 2025 23:03
-
-
Save jamesmcm/3093ce69840ec4882d5eed2ee06f5f19 to your computer and use it in GitHub Desktop.
LLM Directory Context Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| LLM Directory Context Generator | |
| Generates a tree view of a directory followed by the contents of all text files | |
| with appropriate comment syntax for the file path. Respects .gitignore patterns | |
| and skips binary files. | |
| Usage: ./llmdir_context.py <directory> | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import fnmatch | |
| import mimetypes | |
| from pathlib import Path | |
| from typing import List, Set, Optional | |
| # Comment syntax mapping for different file extensions | |
| COMMENT_SYNTAX = { | |
| # Python-style comments | |
| '.py': '#', | |
| '.pyw': '#', | |
| '.pyi': '#', | |
| '.sh': '#', | |
| '.bash': '#', | |
| '.zsh': '#', | |
| '.fish': '#', | |
| '.pl': '#', | |
| '.pm': '#', | |
| '.rb': '#', | |
| '.r': '#', | |
| '.R': '#', | |
| '.yaml': '#', | |
| '.yml': '#', | |
| '.toml': '#', | |
| '.ini': '#', | |
| '.cfg': '#', | |
| '.conf': '#', | |
| '.dockerfile': '#', | |
| # C-style comments | |
| '.c': '//', | |
| '.cpp': '//', | |
| '.cc': '//', | |
| '.cxx': '//', | |
| '.c++': '//', | |
| '.h': '//', | |
| '.hpp': '//', | |
| '.hh': '//', | |
| '.hxx': '//', | |
| '.h++': '//', | |
| '.java': '//', | |
| '.js': '//', | |
| '.jsx': '//', | |
| '.ts': '//', | |
| '.tsx': '//', | |
| '.cs': '//', | |
| '.go': '//', | |
| '.rs': '//', | |
| '.php': '//', | |
| '.swift': '//', | |
| '.kt': '//', | |
| '.kts': '//', | |
| '.scala': '//', | |
| '.dart': '//', | |
| # HTML/XML-style comments | |
| '.html': '<!--', | |
| '.htm': '<!--', | |
| '.xml': '<!--', | |
| '.xhtml': '<!--', | |
| '.svg': '<!--', | |
| # CSS-style comments | |
| '.css': '/*', | |
| '.scss': '//', | |
| '.sass': '//', | |
| '.less': '//', | |
| # SQL comments | |
| '.sql': '--', | |
| # Lua comments | |
| '.lua': '--', | |
| # MATLAB/Octave comments | |
| '.m': '%', | |
| # LaTeX comments | |
| '.tex': '%', | |
| '.sty': '%', | |
| '.cls': '%', | |
| # Assembly comments | |
| '.asm': ';', | |
| '.s': '#', | |
| # Other text files (default to #) | |
| '.txt': '#', | |
| '.md': '<!--', | |
| '.markdown': '<!--', | |
| '.rst': '..', | |
| '.org': '#', | |
| '.json': '//', | |
| '.jsonc': '//', | |
| } | |
| def run_tree(directory: str) -> str: | |
| """Run the tree command on the given directory.""" | |
| try: | |
| # Tree command with ignore patterns for common library/build directories | |
| tree_ignore_patterns = [ | |
| 'node_modules', | |
| 'target', | |
| '.venv', | |
| 'venv', | |
| '__pycache__', | |
| '.git', | |
| 'dist', | |
| 'build', | |
| '.next', | |
| '.nuxt', | |
| 'vendor', | |
| 'Pods', | |
| 'xcuserdata', | |
| 'DerivedData', | |
| '.gradle', | |
| 'bin', | |
| 'obj', | |
| 'packages', | |
| '.pub-cache', | |
| 'flutter_build' | |
| ] | |
| ignore_args = [] | |
| for pattern in tree_ignore_patterns: | |
| ignore_args.extend(['-I', pattern]) | |
| result = subprocess.run( | |
| ['tree'] + ignore_args + [directory], | |
| capture_output=True, | |
| text=True, | |
| check=True | |
| ) | |
| # Filter out the summary line (e.g., "4 directories, 16 files") | |
| lines = result.stdout.strip().split('\n') | |
| if lines and lines[-1].strip() and ('director' in lines[-1] or 'file' in lines[-1]): | |
| lines = lines[:-1] | |
| return '\n'.join(lines) | |
| except subprocess.CalledProcessError: | |
| return f"Error: Could not run tree command on {directory}" | |
| except FileNotFoundError: | |
| # Fallback if tree command is not available | |
| return generate_simple_tree(directory) | |
| def generate_simple_tree(directory: str, prefix: str = "", is_last: bool = True) -> str: | |
| """Generate a simple tree structure if tree command is not available.""" | |
| path = Path(directory) | |
| if not path.exists(): | |
| return f"Error: Directory {directory} does not exist" | |
| # Directories to skip in tree display | |
| skip_dirs = { | |
| 'node_modules', 'target', '.venv', 'venv', '__pycache__', '.git', | |
| 'dist', 'build', '.next', '.nuxt', 'vendor', 'Pods', 'xcuserdata', | |
| 'DerivedData', '.gradle', 'bin', 'obj', 'packages', '.pub-cache', | |
| 'flutter_build' | |
| } | |
| tree_str = f"{path.name}/\n" | |
| try: | |
| entries = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name.lower())) | |
| entries = [e for e in entries if not (e.is_dir() and e.name in skip_dirs)] | |
| for i, entry in enumerate(entries): | |
| is_last_entry = i == len(entries) - 1 | |
| current_prefix = "└── " if is_last_entry else "├── " | |
| tree_str += f"{prefix}{current_prefix}{entry.name}" | |
| if entry.is_file(): | |
| tree_str += "\n" | |
| else: | |
| tree_str += "/\n" | |
| next_prefix = prefix + (" " if is_last_entry else "│ ") | |
| subtree = generate_simple_tree(str(entry), next_prefix, is_last_entry) | |
| # Skip the first line (directory name) of subtree | |
| tree_str += "\n".join(subtree.split("\n")[1:]) | |
| if tree_str and not tree_str.endswith("\n"): | |
| tree_str += "\n" | |
| except PermissionError: | |
| tree_str += f"{prefix}[Permission Denied]\n" | |
| return tree_str | |
| def load_gitignore_patterns(directory: str) -> Set[str]: | |
| """Load .gitignore patterns from the directory and parent directories.""" | |
| patterns = set() | |
| # Default patterns to always ignore | |
| default_patterns = { | |
| '.git/', | |
| '.git', | |
| '*.pyc', | |
| '__pycache__/', | |
| '.DS_Store', | |
| 'Thumbs.db', | |
| '*.log', | |
| '.env', | |
| 'node_modules/', | |
| '.vscode/', | |
| '.idea/', | |
| '*.tmp', | |
| '*.temp', | |
| '*.swp', | |
| '*.swo', | |
| '*~', | |
| '*.svg', | |
| '.gitignore', | |
| 'package-lock.json', | |
| '*.lock', | |
| 'target/', | |
| '.venv/', | |
| } | |
| patterns.update(default_patterns) | |
| # Look for .gitignore files | |
| current_path = Path(directory).resolve() | |
| # Check the target directory and all parent directories up to root | |
| for path in [current_path] + list(current_path.parents): | |
| gitignore_path = path / '.gitignore' | |
| if gitignore_path.exists(): | |
| try: | |
| with open(gitignore_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and not line.startswith('#'): | |
| patterns.add(line) | |
| except (IOError, OSError): | |
| continue | |
| return patterns | |
| def should_ignore_file(file_path: str, base_dir: str, patterns: Set[str]) -> bool: | |
| """Check if a file should be ignored based on gitignore patterns.""" | |
| rel_path = os.path.relpath(file_path, base_dir) | |
| # Normalize path separators | |
| rel_path = rel_path.replace(os.sep, '/') | |
| for pattern in patterns: | |
| # Handle directory patterns | |
| if pattern.endswith('/'): | |
| if rel_path.startswith(pattern) or ('/' + pattern) in ('/' + rel_path + '/'): | |
| return True | |
| # Handle glob patterns | |
| elif fnmatch.fnmatch(rel_path, pattern): | |
| return True | |
| elif fnmatch.fnmatch(os.path.basename(rel_path), pattern): | |
| return True | |
| # Handle absolute patterns (starting with /) | |
| elif pattern.startswith('/') and fnmatch.fnmatch('/' + rel_path, pattern): | |
| return True | |
| return False | |
| def is_text_file(file_path: str) -> bool: | |
| """Check if a file is likely a text file.""" | |
| # Check by extension first | |
| _, ext = os.path.splitext(file_path.lower()) | |
| # Known text extensions | |
| text_extensions = { | |
| '.txt', '.md', '.rst', '.py', '.js', '.html', '.css', '.json', '.xml', | |
| '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.sh', '.bash', | |
| '.c', '.cpp', '.h', '.hpp', '.java', '.rs', '.go', '.php', '.rb', | |
| '.pl', '.r', '.sql', '.lua', '.m', '.tex', '.asm', '.s', '.dockerfile', | |
| '.gitignore', '.gitattributes', '.editorconfig', '.env', '.npmrc', | |
| '.ts', '.tsx', '.jsx', '.vue', '.svelte', '.scss', '.sass', '.less', | |
| '.swift', '.kt', '.scala', '.dart', '.cs', '.fs', '.clj', '.hs', | |
| '.elm', '.ex', '.exs', '.erl', '.hrl', '.jl', '.nim', '.cr', '.zig' | |
| } | |
| if ext in text_extensions: | |
| return True | |
| # Check MIME type | |
| mime_type, _ = mimetypes.guess_type(file_path) | |
| if mime_type and mime_type.startswith('text/'): | |
| return True | |
| # For files without extension or unknown types, try to read a small portion | |
| try: | |
| with open(file_path, 'rb') as f: | |
| chunk = f.read(8192) | |
| if not chunk: | |
| return True # Empty files are text | |
| # Check for null bytes (binary indicator) | |
| if b'\x00' in chunk: | |
| return False | |
| # Try to decode as UTF-8 | |
| try: | |
| chunk.decode('utf-8') | |
| return True | |
| except UnicodeDecodeError: | |
| # Try other common encodings | |
| for encoding in ['latin-1', 'cp1252', 'iso-8859-1']: | |
| try: | |
| chunk.decode(encoding) | |
| return True | |
| except UnicodeDecodeError: | |
| continue | |
| return False | |
| except (IOError, OSError): | |
| return False | |
| def get_comment_syntax(file_path: str) -> str: | |
| """Get the appropriate comment syntax for a file.""" | |
| _, ext = os.path.splitext(file_path.lower()) | |
| # Special handling for files without extensions | |
| filename = os.path.basename(file_path).lower() | |
| if filename in ['dockerfile', 'makefile', 'rakefile', 'gemfile']: | |
| return '#' | |
| return COMMENT_SYNTAX.get(ext, '#') | |
| def format_file_header(file_path: str, base_dir: str) -> str: | |
| """Format the file header comment with appropriate syntax.""" | |
| rel_path = os.path.relpath(file_path, base_dir) | |
| comment_char = get_comment_syntax(file_path) | |
| if comment_char == '<!--': | |
| return f"<!-- {rel_path} -->" | |
| elif comment_char == '/*': | |
| return f"/* {rel_path} */" | |
| elif comment_char == '..': | |
| return f".. {rel_path}" | |
| else: | |
| return f"{comment_char} {rel_path}" | |
| def process_directory(directory: str) -> str: | |
| """Process the directory and generate the LLM context.""" | |
| if not os.path.isdir(directory): | |
| return f"Error: {directory} is not a valid directory" | |
| output = [] | |
| # Generate tree output | |
| tree_output = run_tree(directory) | |
| output.append(tree_output) | |
| # Load gitignore patterns | |
| ignore_patterns = load_gitignore_patterns(directory) | |
| # Walk through all files | |
| for root, dirs, files in os.walk(directory): | |
| # Filter out ignored directories | |
| dirs[:] = [d for d in dirs if not should_ignore_file( | |
| os.path.join(root, d), directory, ignore_patterns | |
| )] | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| # Skip ignored files | |
| if should_ignore_file(file_path, directory, ignore_patterns): | |
| continue | |
| # Skip non-text files | |
| if not is_text_file(file_path): | |
| continue | |
| try: | |
| # Read file content | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| # Add file header and content | |
| header = format_file_header(file_path, directory) | |
| output.append(f"{header}") | |
| output.append(content) | |
| except (IOError, OSError) as e: | |
| output.append(f"# Error reading {os.path.relpath(file_path, directory)}: {e}") | |
| return "\n".join(output) | |
| def main(): | |
| if len(sys.argv) != 2: | |
| print("Usage: ./llmdir_context.py <directory>", file=sys.stderr) | |
| sys.exit(1) | |
| directory = sys.argv[1] | |
| result = process_directory(directory) | |
| print(result) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment