Skip to content

Instantly share code, notes, and snippets.

@jamesmcm
Created August 3, 2025 23:03
Show Gist options
  • Select an option

  • Save jamesmcm/3093ce69840ec4882d5eed2ee06f5f19 to your computer and use it in GitHub Desktop.

Select an option

Save jamesmcm/3093ce69840ec4882d5eed2ee06f5f19 to your computer and use it in GitHub Desktop.
LLM Directory Context Generator
#!/usr/bin/env python3
"""
LLM Directory Context Generator
Generates a tree view of a directory followed by the contents of all text files
with appropriate comment syntax for the file path. Respects .gitignore patterns
and skips binary files.
Usage: ./llmdir_context.py <directory>
"""
import os
import sys
import subprocess
import fnmatch
import mimetypes
from pathlib import Path
from typing import List, Set, Optional
# Comment syntax mapping for different file extensions
COMMENT_SYNTAX = {
# Python-style comments
'.py': '#',
'.pyw': '#',
'.pyi': '#',
'.sh': '#',
'.bash': '#',
'.zsh': '#',
'.fish': '#',
'.pl': '#',
'.pm': '#',
'.rb': '#',
'.r': '#',
'.R': '#',
'.yaml': '#',
'.yml': '#',
'.toml': '#',
'.ini': '#',
'.cfg': '#',
'.conf': '#',
'.dockerfile': '#',
# C-style comments
'.c': '//',
'.cpp': '//',
'.cc': '//',
'.cxx': '//',
'.c++': '//',
'.h': '//',
'.hpp': '//',
'.hh': '//',
'.hxx': '//',
'.h++': '//',
'.java': '//',
'.js': '//',
'.jsx': '//',
'.ts': '//',
'.tsx': '//',
'.cs': '//',
'.go': '//',
'.rs': '//',
'.php': '//',
'.swift': '//',
'.kt': '//',
'.kts': '//',
'.scala': '//',
'.dart': '//',
# HTML/XML-style comments
'.html': '<!--',
'.htm': '<!--',
'.xml': '<!--',
'.xhtml': '<!--',
'.svg': '<!--',
# CSS-style comments
'.css': '/*',
'.scss': '//',
'.sass': '//',
'.less': '//',
# SQL comments
'.sql': '--',
# Lua comments
'.lua': '--',
# MATLAB/Octave comments
'.m': '%',
# LaTeX comments
'.tex': '%',
'.sty': '%',
'.cls': '%',
# Assembly comments
'.asm': ';',
'.s': '#',
# Other text files (default to #)
'.txt': '#',
'.md': '<!--',
'.markdown': '<!--',
'.rst': '..',
'.org': '#',
'.json': '//',
'.jsonc': '//',
}
def run_tree(directory: str) -> str:
"""Run the tree command on the given directory."""
try:
# Tree command with ignore patterns for common library/build directories
tree_ignore_patterns = [
'node_modules',
'target',
'.venv',
'venv',
'__pycache__',
'.git',
'dist',
'build',
'.next',
'.nuxt',
'vendor',
'Pods',
'xcuserdata',
'DerivedData',
'.gradle',
'bin',
'obj',
'packages',
'.pub-cache',
'flutter_build'
]
ignore_args = []
for pattern in tree_ignore_patterns:
ignore_args.extend(['-I', pattern])
result = subprocess.run(
['tree'] + ignore_args + [directory],
capture_output=True,
text=True,
check=True
)
# Filter out the summary line (e.g., "4 directories, 16 files")
lines = result.stdout.strip().split('\n')
if lines and lines[-1].strip() and ('director' in lines[-1] or 'file' in lines[-1]):
lines = lines[:-1]
return '\n'.join(lines)
except subprocess.CalledProcessError:
return f"Error: Could not run tree command on {directory}"
except FileNotFoundError:
# Fallback if tree command is not available
return generate_simple_tree(directory)
def generate_simple_tree(directory: str, prefix: str = "", is_last: bool = True) -> str:
"""Generate a simple tree structure if tree command is not available."""
path = Path(directory)
if not path.exists():
return f"Error: Directory {directory} does not exist"
# Directories to skip in tree display
skip_dirs = {
'node_modules', 'target', '.venv', 'venv', '__pycache__', '.git',
'dist', 'build', '.next', '.nuxt', 'vendor', 'Pods', 'xcuserdata',
'DerivedData', '.gradle', 'bin', 'obj', 'packages', '.pub-cache',
'flutter_build'
}
tree_str = f"{path.name}/\n"
try:
entries = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name.lower()))
entries = [e for e in entries if not (e.is_dir() and e.name in skip_dirs)]
for i, entry in enumerate(entries):
is_last_entry = i == len(entries) - 1
current_prefix = "└── " if is_last_entry else "├── "
tree_str += f"{prefix}{current_prefix}{entry.name}"
if entry.is_file():
tree_str += "\n"
else:
tree_str += "/\n"
next_prefix = prefix + (" " if is_last_entry else "│ ")
subtree = generate_simple_tree(str(entry), next_prefix, is_last_entry)
# Skip the first line (directory name) of subtree
tree_str += "\n".join(subtree.split("\n")[1:])
if tree_str and not tree_str.endswith("\n"):
tree_str += "\n"
except PermissionError:
tree_str += f"{prefix}[Permission Denied]\n"
return tree_str
def load_gitignore_patterns(directory: str) -> Set[str]:
"""Load .gitignore patterns from the directory and parent directories."""
patterns = set()
# Default patterns to always ignore
default_patterns = {
'.git/',
'.git',
'*.pyc',
'__pycache__/',
'.DS_Store',
'Thumbs.db',
'*.log',
'.env',
'node_modules/',
'.vscode/',
'.idea/',
'*.tmp',
'*.temp',
'*.swp',
'*.swo',
'*~',
'*.svg',
'.gitignore',
'package-lock.json',
'*.lock',
'target/',
'.venv/',
}
patterns.update(default_patterns)
# Look for .gitignore files
current_path = Path(directory).resolve()
# Check the target directory and all parent directories up to root
for path in [current_path] + list(current_path.parents):
gitignore_path = path / '.gitignore'
if gitignore_path.exists():
try:
with open(gitignore_path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
patterns.add(line)
except (IOError, OSError):
continue
return patterns
def should_ignore_file(file_path: str, base_dir: str, patterns: Set[str]) -> bool:
"""Check if a file should be ignored based on gitignore patterns."""
rel_path = os.path.relpath(file_path, base_dir)
# Normalize path separators
rel_path = rel_path.replace(os.sep, '/')
for pattern in patterns:
# Handle directory patterns
if pattern.endswith('/'):
if rel_path.startswith(pattern) or ('/' + pattern) in ('/' + rel_path + '/'):
return True
# Handle glob patterns
elif fnmatch.fnmatch(rel_path, pattern):
return True
elif fnmatch.fnmatch(os.path.basename(rel_path), pattern):
return True
# Handle absolute patterns (starting with /)
elif pattern.startswith('/') and fnmatch.fnmatch('/' + rel_path, pattern):
return True
return False
def is_text_file(file_path: str) -> bool:
"""Check if a file is likely a text file."""
# Check by extension first
_, ext = os.path.splitext(file_path.lower())
# Known text extensions
text_extensions = {
'.txt', '.md', '.rst', '.py', '.js', '.html', '.css', '.json', '.xml',
'.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.sh', '.bash',
'.c', '.cpp', '.h', '.hpp', '.java', '.rs', '.go', '.php', '.rb',
'.pl', '.r', '.sql', '.lua', '.m', '.tex', '.asm', '.s', '.dockerfile',
'.gitignore', '.gitattributes', '.editorconfig', '.env', '.npmrc',
'.ts', '.tsx', '.jsx', '.vue', '.svelte', '.scss', '.sass', '.less',
'.swift', '.kt', '.scala', '.dart', '.cs', '.fs', '.clj', '.hs',
'.elm', '.ex', '.exs', '.erl', '.hrl', '.jl', '.nim', '.cr', '.zig'
}
if ext in text_extensions:
return True
# Check MIME type
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type and mime_type.startswith('text/'):
return True
# For files without extension or unknown types, try to read a small portion
try:
with open(file_path, 'rb') as f:
chunk = f.read(8192)
if not chunk:
return True # Empty files are text
# Check for null bytes (binary indicator)
if b'\x00' in chunk:
return False
# Try to decode as UTF-8
try:
chunk.decode('utf-8')
return True
except UnicodeDecodeError:
# Try other common encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
chunk.decode(encoding)
return True
except UnicodeDecodeError:
continue
return False
except (IOError, OSError):
return False
def get_comment_syntax(file_path: str) -> str:
"""Get the appropriate comment syntax for a file."""
_, ext = os.path.splitext(file_path.lower())
# Special handling for files without extensions
filename = os.path.basename(file_path).lower()
if filename in ['dockerfile', 'makefile', 'rakefile', 'gemfile']:
return '#'
return COMMENT_SYNTAX.get(ext, '#')
def format_file_header(file_path: str, base_dir: str) -> str:
"""Format the file header comment with appropriate syntax."""
rel_path = os.path.relpath(file_path, base_dir)
comment_char = get_comment_syntax(file_path)
if comment_char == '<!--':
return f"<!-- {rel_path} -->"
elif comment_char == '/*':
return f"/* {rel_path} */"
elif comment_char == '..':
return f".. {rel_path}"
else:
return f"{comment_char} {rel_path}"
def process_directory(directory: str) -> str:
"""Process the directory and generate the LLM context."""
if not os.path.isdir(directory):
return f"Error: {directory} is not a valid directory"
output = []
# Generate tree output
tree_output = run_tree(directory)
output.append(tree_output)
# Load gitignore patterns
ignore_patterns = load_gitignore_patterns(directory)
# Walk through all files
for root, dirs, files in os.walk(directory):
# Filter out ignored directories
dirs[:] = [d for d in dirs if not should_ignore_file(
os.path.join(root, d), directory, ignore_patterns
)]
for file in files:
file_path = os.path.join(root, file)
# Skip ignored files
if should_ignore_file(file_path, directory, ignore_patterns):
continue
# Skip non-text files
if not is_text_file(file_path):
continue
try:
# Read file content
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# Add file header and content
header = format_file_header(file_path, directory)
output.append(f"{header}")
output.append(content)
except (IOError, OSError) as e:
output.append(f"# Error reading {os.path.relpath(file_path, directory)}: {e}")
return "\n".join(output)
def main():
if len(sys.argv) != 2:
print("Usage: ./llmdir_context.py <directory>", file=sys.stderr)
sys.exit(1)
directory = sys.argv[1]
result = process_directory(directory)
print(result)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment