Created
November 24, 2024 23:50
-
-
Save lsemenenko/aa17b6eed0e2e863c8cbdbd68160170e to your computer and use it in GitHub Desktop.
Combine files, with filename/dir hints, to load into AI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # =========================================================================== | |
| # File Combiner - Combines multiple files into a single file with path headers | |
| # =========================================================================== | |
| # | |
| # Description: | |
| # This script recursively combines files from a directory into a single file, | |
| # adding the file path as a comment before each file's content. Useful for | |
| # preparing codebases for AI analysis or documentation. | |
| # | |
| # Features: | |
| # - Adds file paths as comments (# path/to/file.ext) | |
| # - Skips binary files automatically | |
| # - Can filter by file extensions | |
| # - Excludes common dirs (.git, node_modules, etc.) by default | |
| # - Handles spaces in filenames | |
| # - Maintains relative paths | |
| # | |
| # Usage: | |
| # ./combine_files.sh [options] source_directory output_file | |
| # | |
| # Options: | |
| # -e 'ext1 ext2 ...' Only include files with these extensions | |
| # -x 'dir1 dir2 ...' Additional directories to exclude | |
| # -h Show help message | |
| # | |
| # Examples: | |
| # # Basic usage - combine all text files: | |
| # ./combine_files.sh ./my_project combined_files.txt | |
| # | |
| # # Only combine Python and JavaScript files: | |
| # ./combine_files.sh -e "py js" ./my_project combined_files.txt | |
| # | |
| # # Exclude additional directories: | |
| # ./combine_files.sh -x "build dist" ./my_project combined_files.txt | |
| # | |
| # # Combine specific files and exclude dirs: | |
| # ./combine_files.sh -e "py js jsx" -x "build dist tests" ./my_project combined_files.txt | |
| # | |
| # Default excluded directories: | |
| # .git, node_modules, __pycache__, venv, .env | |
| # Usage function | |
| usage() { | |
| echo "Usage: $0 [options] directory output_file" | |
| echo "Options:" | |
| echo " -e 'ext1 ext2 ...' Only include files with these extensions (space-separated)" | |
| echo " -x 'dir1 dir2 ...' Exclude these directories (space-separated)" | |
| echo " -h Show this help message" | |
| echo | |
| echo "Example:" | |
| echo " $0 -e 'py js txt' -x 'node_modules .git' /path/to/source output.txt" | |
| exit 1 | |
| } | |
| # Default excluded directories | |
| EXCLUDE_DIRS=".git node_modules __pycache__ venv .env" | |
| EXTENSIONS="" | |
| # Parse command line options | |
| while getopts "e:x:h" opt; do | |
| case $opt in | |
| e) EXTENSIONS="$OPTARG" ;; | |
| x) EXCLUDE_DIRS="$EXCLUDE_DIRS $OPTARG" ;; | |
| h) usage ;; | |
| ?) usage ;; | |
| esac | |
| done | |
| # Shift past the options | |
| shift $((OPTIND-1)) | |
| # Check for required arguments | |
| if [ $# -ne 2 ]; then | |
| usage | |
| fi | |
| SOURCE_DIR="$1" | |
| OUTPUT_FILE="$2" | |
| # Create exclude pattern for find command | |
| EXCLUDE_PATTERN="" | |
| for dir in $EXCLUDE_DIRS; do | |
| EXCLUDE_PATTERN="$EXCLUDE_PATTERN -not -path '*/$dir/*'" | |
| done | |
| # Create extension pattern for find command | |
| EXT_PATTERN="" | |
| if [ ! -z "$EXTENSIONS" ]; then | |
| for ext in $EXTENSIONS; do | |
| if [ -z "$EXT_PATTERN" ]; then | |
| EXT_PATTERN="-name '*.$ext'" | |
| else | |
| EXT_PATTERN="$EXT_PATTERN -o -name '*.$ext'" | |
| fi | |
| done | |
| EXT_PATTERN="\\( $EXT_PATTERN \\)" | |
| fi | |
| # Clear or create the output file | |
| > "$OUTPUT_FILE" | |
| # Main processing | |
| eval "find '$SOURCE_DIR' -type f $EXCLUDE_PATTERN $EXT_PATTERN" | while read -r file; do | |
| # Get relative path | |
| rel_path=${file#$SOURCE_DIR/} | |
| # Check if file is binary | |
| if file "$file" | grep -q "text"; then | |
| echo "# $rel_path" >> "$OUTPUT_FILE" | |
| cat "$file" >> "$OUTPUT_FILE" | |
| echo "" >> "$OUTPUT_FILE" | |
| else | |
| echo "Skipping binary file: $rel_path" >&2 | |
| fi | |
| done | |
| echo "Files combined successfully into $OUTPUT_FILE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment