-
-
Save shawn-albert/6352144b03e4650916d2fcbac25aa533 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python3 | |
| """ | |
| Code Context Consolidator | |
| Consolidate code files from a specified directory into a single text file for LLM context. | |
| This script traverses a given directory, collects code files, and compiles them into a single | |
| text file with appropriate code blocks and directory structure for providing context to an LLM. | |
| It supports inclusion and exclusion of specific file types, files, and folders. | |
| **Running with `uv`** | |
| To ensure that all dependencies are managed without manually handling environments, it's recommended | |
| to run this script using [`uv`](https://github.com/astral-sh/uv). `uv` automatically manages virtual | |
| environments and dependencies for your scripts. | |
| Install `uv` by following the instructions [here](https://docs.astral.sh/uv/getting-started/installation/). | |
| Run the script using: | |
| uv run --with typer code_context_consolidator.py [OPTIONS] DIRECTORY | |
| For more details on running scripts with `uv`, see the [documentation](https://docs.astral.sh/uv/guides/scripts/). | |
| **Usage:** | |
| python code_context_consolidator.py [OPTIONS] DIRECTORY | |
| **Options:** | |
| --exclude-file-types TEXT Comma-separated list of file extensions to exclude. | |
| --exclude-files TEXT Comma-separated list of file names to exclude. | |
| --exclude-folders TEXT Comma-separated list of folder names to exclude. | |
| --include-file-types TEXT Comma-separated list of file extensions to include. | |
| --include-folders TEXT Comma-separated list of folder names to include. | |
| --output-file TEXT Path and name of the output text file. | |
| **Example:** | |
| To exclude multiple files and folders, such as `node_modules`, `dist`, `cdk.out`, `__pycache__`, | |
| a specific folder under `src`, all Jupyter Notebook files (`*.ipynb`), and `__init__.py` files, run: | |
| uv run --with typer code_context_consolidator.py \ | |
| --exclude-file-types ipynb \ | |
| --exclude-files __init__.py \ | |
| --exclude-folders node_modules,dist,cdk.out,__pycache__,src/specific_folder \ | |
| path/to/your/project | |
| This will consolidate all code files in `path/to/your/project`, excluding the specified files and folders, | |
| into a single `output.txt` file in the current working directory. | |
| """ | |
| import traceback | |
| from pathlib import Path | |
| from typing import List, Optional | |
| import typer | |
| app = typer.Typer(add_completion=False) | |
| def generate_directory_structure(directory: Path, exclude_folders: List[str]) -> str: | |
| """ | |
| Generate the directory structure of the given directory. | |
| Args: | |
| directory (Path): The root directory to generate the structure from. | |
| exclude_folders (List[str]): List of folder names to exclude. | |
| Returns: | |
| str: A string representing the directory structure in tree format. | |
| """ | |
| from io import StringIO | |
| output = StringIO() | |
| def tree(dir_path: Path, prefix: str = ""): | |
| entries = [ | |
| entry for entry in dir_path.iterdir() if entry.name not in exclude_folders | |
| ] | |
| entries.sort() | |
| entries_count = len(entries) | |
| for index, entry in enumerate(entries): | |
| connector = "├── " if index < entries_count - 1 else "└── " | |
| output.write(f"{prefix}{connector}{entry.name}\n") | |
| if entry.is_dir(): | |
| extension = "│ " if index < entries_count - 1 else " " | |
| tree(entry, prefix + extension) | |
| output.write(f"{directory.name}\n") | |
| tree(directory) | |
| return output.getvalue() | |
| def collect_files( | |
| directory: Path, | |
| exclude_file_types: List[str], | |
| exclude_folders: List[str], | |
| include_file_types: Optional[List[str]], | |
| include_folders: Optional[List[str]], | |
| exclude_files: List[str], | |
| ) -> List[Path]: | |
| """ | |
| Collect files from the directory respecting inclusion and exclusion criteria. | |
| Args: | |
| directory (Path): The root directory to collect files from. | |
| exclude_file_types (List[str]): List of file extensions to exclude. | |
| exclude_folders (List[str]): List of folder names to exclude. | |
| include_file_types (Optional[List[str]]): List of file extensions to include. | |
| include_folders (Optional[List[str]]): List of folder names to include. | |
| exclude_files (List[str]): List of file names to exclude. | |
| Returns: | |
| List[Path]: A list of file paths collected. | |
| """ | |
| files = [] | |
| for path in directory.rglob("*"): | |
| if path.is_file(): | |
| if any(folder in path.parts for folder in exclude_folders): | |
| continue | |
| if include_folders and not any( | |
| folder in path.parts for folder in include_folders | |
| ): | |
| continue | |
| if include_file_types and path.suffix not in include_file_types: | |
| continue | |
| if path.suffix in exclude_file_types: | |
| continue | |
| if path.name in exclude_files: | |
| continue | |
| files.append(path) | |
| return files | |
| def get_code_block_language(file_extension: str) -> str: | |
| """ | |
| Map file extensions to code block languages. | |
| Args: | |
| file_extension (str): The file extension. | |
| Returns: | |
| str: The code block language identifier. | |
| """ | |
| extension_language_map = { | |
| ".py": "python", | |
| ".js": "javascript", | |
| ".ts": "typescript", | |
| ".java": "java", | |
| ".cpp": "cpp", | |
| ".c": "c", | |
| ".h": "c", | |
| ".html": "html", | |
| ".css": "css", | |
| ".md": "markdown", | |
| ".sh": "shell", | |
| ".json": "json", | |
| ".yml": "yaml", | |
| ".yaml": "yaml", | |
| ".xml": "xml", | |
| ".go": "go", | |
| ".rb": "ruby", | |
| ".php": "php", | |
| ".rs": "rust", | |
| } | |
| return extension_language_map.get(file_extension, "") | |
| @app.command() | |
| def main( | |
| directory: Path = typer.Argument( | |
| ..., | |
| exists=True, | |
| file_okay=False, | |
| dir_okay=True, | |
| readable=True, | |
| help="Directory to parse and compile the code from.", | |
| ), | |
| exclude_file_types: Optional[str] = typer.Option( | |
| None, help="Comma-separated list of file extensions to exclude." | |
| ), | |
| exclude_files: Optional[str] = typer.Option( | |
| None, help="Comma-separated list of file names to exclude." | |
| ), | |
| exclude_folders: Optional[str] = typer.Option( | |
| None, help="Comma-separated list of folder names to exclude." | |
| ), | |
| include_file_types: Optional[str] = typer.Option( | |
| None, help="Comma-separated list of file extensions to include." | |
| ), | |
| include_folders: Optional[str] = typer.Option( | |
| None, help="Comma-separated list of folder names to include." | |
| ), | |
| output_file: Optional[Path] = typer.Option( | |
| None, help="Path and name of the output text file." | |
| ), | |
| ): | |
| """ | |
| Consolidate code files from a specified directory into a single text file for LLM context. | |
| """ | |
| try: | |
| if not output_file: | |
| output_file = Path.cwd() / "output.txt" | |
| exclude_file_types_list = ( | |
| exclude_file_types.split(",") if exclude_file_types else [] | |
| ) | |
| exclude_file_types_list = [ | |
| f".{ext.strip().lstrip('.')}" for ext in exclude_file_types_list | |
| ] | |
| exclude_files_list = exclude_files.split(",") if exclude_files else [] | |
| exclude_files_list = [name.strip() for name in exclude_files_list] | |
| exclude_folders_list = exclude_folders.split(",") if exclude_folders else [] | |
| include_file_types_list = ( | |
| include_file_types.split(",") if include_file_types else None | |
| ) | |
| if include_file_types_list: | |
| include_file_types_list = [ | |
| f".{ext.strip().lstrip('.')}" for ext in include_file_types_list | |
| ] | |
| include_folders_list = include_folders.split(",") if include_folders else None | |
| directory_structure = generate_directory_structure( | |
| directory, exclude_folders_list | |
| ) | |
| content_lines = ["```shell", directory_structure, "```", "\n"] | |
| files = collect_files( | |
| directory, | |
| exclude_file_types_list, | |
| exclude_folders_list, | |
| include_file_types_list, | |
| include_folders_list, | |
| exclude_files_list, | |
| ) | |
| for file_path in sorted(files): | |
| relative_path = file_path.relative_to(directory) | |
| language = get_code_block_language(file_path.suffix) | |
| with file_path.open("r", encoding="utf-8", errors="ignore") as f: | |
| file_content = f.read() | |
| content_lines.append(f"File: {relative_path}") | |
| content_lines.append(f"```{language}") | |
| content_lines.append(file_content) | |
| content_lines.append("```") | |
| content_lines.append("\n") | |
| output_file.write_text("\n".join(content_lines), encoding="utf-8") | |
| typer.echo(f"Consolidated code written to {output_file}") | |
| except Exception as e: | |
| typer.echo(f"An error occurred: {e}") | |
| typer.echo(traceback.format_exc()) | |
| if __name__ == "__main__": | |
| app() |
If you're open to contributions, will you add a license? I have a suggestion that would exclude --exclude-files and --exclude-file-types from the directory tree.
If you're open to contributions, will you add a license? I have a suggestion that would exclude
--exclude-filesand--exclude-file-typesfrom the directory tree.
@kielmarj Thank you for your interest and for the thoughtful suggestion!
I've updated the comment on this Gist to include the MIT License and a new Contributions section. Feel free to fork the Gist and share your updates, especially your proposed improvement to exclude --exclude-files and --exclude-file-types from the directory tree. Looking forward to your contributions! 😊
Feel free to fork the Gist and share your updates, especially your proposed improvement to exclude
--exclude-filesand--exclude-file-typesfrom the directory tree. Looking forward to your contributions! 😊
Here's the link to my revision.
generate_directory_structure was updated to also exclude any files specified by the --exclude-files and --exclude-file-types options. This is the primary change.
The other minor changes were my attempt to implement improvements recommended by pylint.
Here's the link to my revision.
Also added option to exclude empty directories from the directory tree: --exclude-empty-dirs
Code Context Consolidator
Consolidate code files from a specified directory into a single text file for LLM context.
This script traverses a given directory, collects code files, and compiles them into a single text file with appropriate code blocks and directory structure. It supports inclusion and exclusion of specific file types, files, and folders.
Features
python,javascript).Running with
uvTo ensure that all dependencies are managed without manually handling environments, it's recommended to run this script using
uv.uvautomatically manages virtual environments and dependencies for your scripts.Install
uv: Follow the instructions here.Run the Script:
Replace
[OPTIONS]with any of the available options andDIRECTORYwith the path to the directory you want to consolidate.For more details on running scripts with
uv, see the documentation.Usage
Options
--exclude-file-types TEXTComma-separated list of file extensions to exclude.
--exclude-files TEXTComma-separated list of file names to exclude.
--exclude-folders TEXTComma-separated list of folder names to exclude.
--include-file-types TEXTComma-separated list of file extensions to include.
--include-folders TEXTComma-separated list of folder names to include.
--output-file TEXTPath and name of the output text file.
Example
To exclude multiple files and folders, such as
node_modules,dist,cdk.out,__pycache__, a specific folder undersrc, all Jupyter Notebook files (*.ipynb), and__init__.pyfiles, run:uv run --with typer code_context_consolidator.py \ --exclude-file-types ipynb \ --exclude-files __init__.py \ --exclude-folders node_modules,dist,cdk.out,__pycache__,src/specific_folder \ path/to/your/projectThis will consolidate all code files in
path/to/your/project, excluding the specified files and folders, into a singleoutput.txtfile in the current working directory.Notes
typerfor command-line interface management.output.txtin the current working directory if no output file is specified.License
This script is provided under the MIT License.
Contributions
Contributions to this Gist are always appreciated! While GitHub doesn’t allow pull requests for Gists through the GUI, you can still share your improvements by following these steps:
Fork the Gist:
Make Your Changes:
Share Your Fork:
Pulling Changes Locally (For Reference):
Example:
Thank you for taking the time to contribute and improve this Gist! 😊