Skip to content

Instantly share code, notes, and snippets.

@lsemenenko
Created November 24, 2024 23:50
Show Gist options
  • Select an option

  • Save lsemenenko/aa17b6eed0e2e863c8cbdbd68160170e to your computer and use it in GitHub Desktop.

Select an option

Save lsemenenko/aa17b6eed0e2e863c8cbdbd68160170e to your computer and use it in GitHub Desktop.
Combine files, with filename/dir hints, to load into AI
#!/bin/bash
# ===========================================================================
# File Combiner - Combines multiple files into a single file with path headers
# ===========================================================================
#
# Description:
# This script recursively combines files from a directory into a single file,
# adding the file path as a comment before each file's content. Useful for
# preparing codebases for AI analysis or documentation.
#
# Features:
# - Adds file paths as comments (# path/to/file.ext)
# - Skips binary files automatically
# - Can filter by file extensions
# - Excludes common dirs (.git, node_modules, etc.) by default
# - Handles spaces in filenames
# - Maintains relative paths
#
# Usage:
# ./combine_files.sh [options] source_directory output_file
#
# Options:
# -e 'ext1 ext2 ...' Only include files with these extensions
# -x 'dir1 dir2 ...' Additional directories to exclude
# -h Show help message
#
# Examples:
# # Basic usage - combine all text files:
# ./combine_files.sh ./my_project combined_files.txt
#
# # Only combine Python and JavaScript files:
# ./combine_files.sh -e "py js" ./my_project combined_files.txt
#
# # Exclude additional directories:
# ./combine_files.sh -x "build dist" ./my_project combined_files.txt
#
# # Combine specific files and exclude dirs:
# ./combine_files.sh -e "py js jsx" -x "build dist tests" ./my_project combined_files.txt
#
# Default excluded directories:
# .git, node_modules, __pycache__, venv, .env
# Usage function
usage() {
echo "Usage: $0 [options] directory output_file"
echo "Options:"
echo " -e 'ext1 ext2 ...' Only include files with these extensions (space-separated)"
echo " -x 'dir1 dir2 ...' Exclude these directories (space-separated)"
echo " -h Show this help message"
echo
echo "Example:"
echo " $0 -e 'py js txt' -x 'node_modules .git' /path/to/source output.txt"
exit 1
}
# Default excluded directories
EXCLUDE_DIRS=".git node_modules __pycache__ venv .env"
EXTENSIONS=""
# Parse command line options
while getopts "e:x:h" opt; do
case $opt in
e) EXTENSIONS="$OPTARG" ;;
x) EXCLUDE_DIRS="$EXCLUDE_DIRS $OPTARG" ;;
h) usage ;;
?) usage ;;
esac
done
# Shift past the options
shift $((OPTIND-1))
# Check for required arguments
if [ $# -ne 2 ]; then
usage
fi
SOURCE_DIR="$1"
OUTPUT_FILE="$2"
# Create exclude pattern for find command
EXCLUDE_PATTERN=""
for dir in $EXCLUDE_DIRS; do
EXCLUDE_PATTERN="$EXCLUDE_PATTERN -not -path '*/$dir/*'"
done
# Create extension pattern for find command
EXT_PATTERN=""
if [ ! -z "$EXTENSIONS" ]; then
for ext in $EXTENSIONS; do
if [ -z "$EXT_PATTERN" ]; then
EXT_PATTERN="-name '*.$ext'"
else
EXT_PATTERN="$EXT_PATTERN -o -name '*.$ext'"
fi
done
EXT_PATTERN="\\( $EXT_PATTERN \\)"
fi
# Clear or create the output file
> "$OUTPUT_FILE"
# Main processing
eval "find '$SOURCE_DIR' -type f $EXCLUDE_PATTERN $EXT_PATTERN" | while read -r file; do
# Get relative path
rel_path=${file#$SOURCE_DIR/}
# Check if file is binary
if file "$file" | grep -q "text"; then
echo "# $rel_path" >> "$OUTPUT_FILE"
cat "$file" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
else
echo "Skipping binary file: $rel_path" >&2
fi
done
echo "Files combined successfully into $OUTPUT_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment