Skip to content

Instantly share code, notes, and snippets.

@mrsimpson
Last active January 13, 2025 07:45
Show Gist options
  • Save mrsimpson/b329bddb9005629762c13beb437dc1af to your computer and use it in GitHub Desktop.
Save mrsimpson/b329bddb9005629762c13beb437dc1af to your computer and use it in GitHub Desktop.
Serialization of files from multiple folders

Project Serializer

Purpose

A flexible shell function to serialize source code directories into a single text stream, useful for preprocessing code for LLM analysis or code review.

Features

  • Serialize multiple project directories
  • Include/exclude file patterns
  • Skip binary files
  • Verbose mode for debugging

Installation

Add to ~/.bashrc or ~/.zshrc:

source /path/to/this/script

Usage

Basic Serialization

# Serialize entire directory
serialize_folder /path/to/project

# Multiple directories
serialize_folder /path/project1 /path/project2

# Multiple directories
serialize_folder /path/project1 /path/project2

Filtering

# Include only Python and JavaScript files
serialize_folder -i "*.py,*.js" /path/to/project

# Exclude test and node_modules directories
serialize_folder -e "test,node_modules" /path/to/project

# Combine include and exclude
serialize_folder -i "*.py" -e "*test*" /path/to/project

Output Options

# Save to file
serialize_folder /path/to/project > project_contents.txt

# Copy to clipboard
serialize_folder /path/to/project | xclip -selection clipboard

# Verbose debugging
serialize_folder -v /path/to/project

Options

-i, --include PATTERN: Include files matching pattern
-e, --exclude PATTERN: Exclude files/directories matching pattern
-v, --verbose: Enable verbose output
-h, --help: Show help message
#/bin/bash
serialize_folder() {
local dirs=()
local include_pattern="*"
local exclude_pattern=""
local verbose=0
# Redirect all non-essential output to stderr
exec 3>&2
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
-i|--include)
include_pattern="$2"
shift 2
;;
-e|--exclude)
exclude_pattern="$2"
shift 2
;;
-v|--verbose)
verbose=1
shift
;;
-h|--help)
echo >&3 "Usage: serialize_folder [options] <directory1> [directory2] ..."
echo >&3 "Options:"
echo >&3 " -i, --include PATTERN Include files matching pattern (e.g., '*.py,*.js')"
echo >&3 " -e, --exclude PATTERN Exclude files/directories matching pattern"
echo >&3 " -v, --verbose Enable verbose output"
echo >&3 " -h, --help Show this help message"
return 0
;;
*)
dirs+=("$1")
shift
;;
esac
done
# Check if any directories are provided
if [ ${#dirs[@]} -eq 0 ]; then
echo >&3 "Error: No directories specified. Use -h for help."
return 1
fi
# Function to check if a file is binary
_is_binary() {
file --mime-type "$1" | grep -q 'binary'
return $?
}
# Function to check if directory or path should be excluded
_should_exclude_dir() {
local path="$1"
# Check if directory matches exclude pattern
if [[ -n "$exclude_pattern" ]]; then
# Split exclude pattern into array
local -a exclude_patterns
IFS=',' read -A exclude_patterns <<< "$exclude_pattern"
# Extract base name (Zsh parameter expansion)
local dirname="${path##*/}"
# Check against full path and directory name
for pattern in "${exclude_patterns[@]}"; do
# Check if full path matches pattern
if [[ "$path" == *"/$pattern/"* ]] ||
[[ "$path" == *"/$pattern" ]] ||
[[ "$path" == "$pattern/"* ]] ||
[[ "$dirname" == "$pattern" ]]; then
return 0 # Should exclude
fi
done
fi
return 1 # Do not exclude
}
# Function to check if file should be excluded
_should_exclude_file() {
local file="$1"
# Check if file matches exclude pattern
if [[ -n "$exclude_pattern" ]]; then
# Split exclude pattern into array
local -a exclude_patterns
IFS=',' read -A exclude_patterns <<< "$exclude_pattern"
# Check against filename and path
for pattern in "${exclude_patterns[@]}"; do
if [[ "$(basename "$file")" == ${~pattern} ]]; then
return 0 # Should exclude
fi
done
fi
return 1 # Do not exclude
}
# Function to check if file should be included
_should_include() {
local file="$1"
# Check if file matches include pattern
if [[ -n "$include_pattern" ]]; then
# Split include pattern into array
local -a include_patterns
IFS=',' read -A include_patterns <<< "$include_pattern"
# Check against filename
local filename=$(basename "$file")
for pattern in "${include_patterns[@]}"; do
if [[ "$filename" == ${~pattern} ]]; then
return 0 # Should include
fi
done
return 1 # Do not include
fi
return 0 # No include pattern, so include by default
}
# Main processing function
_process_directory() {
local dir="$1"
local processed_files=0
### parameter handling
# Check if directory exists
if [ ! -d "$dir" ]; then
echo "Error: $dir is not a valid directory" >&2
return 1
fi
# Skip entire excluded directories
if _should_exclude_dir "$dir"; then
[ $verbose -eq 1 ] && echo >&3 "Skipping entire directory: $dir" >&2
return 0
fi
# Normalize the directory path (remove trailing slashes)
dir=$(realpath "$dir")
# Verbose output
if [ $verbose -eq 1 ]; then
echo "Processing directory: $dir" >&2
echo "Include pattern: $include_pattern" >&2
echo "Exclude pattern: $exclude_pattern" >&2
fi
# Traverse directory recursively, using find with pruning
find "$dir" -type d | while read -r current_dir; do
# Skip entire directories that match exclude pattern
if _should_exclude_dir "$current_dir"; then
[ $verbose -eq 1 ] && echo >&3 "Skipping entire directory: $current_dir" >&2
continue
fi
# Find files in this directory, excluding binary and unwanted files
find "$current_dir" -maxdepth 1 -type f | while read -r file; do
# Skip binary files
if _is_binary "$file"; then
[ $verbose -eq 1 ] && echo >&3 "Skipping binary file: $file" >&2
continue
fi
# Skip files that don't match include pattern
if ! _should_include "$file"; then
[ $verbose -eq 1 ] && echo >&3 "Skipping (not matching include): $file" >&2
continue
fi
# Skip files that match exclude pattern
if _should_exclude_file "$file"; then
[ $verbose -eq 1 ] && echo >&3 "Skipping (matching exclude): $file" >&2
continue
fi
# Skip version control and hidden directories
if [[ "$file" =~ \/\.(git|svn|hg|tmp) ]]; then
[ $verbose -eq 1 ] && echo >&3 "Skipping version control dir: $file" >&2
continue
fi
# Emit file header
printf "/// file %s\n" "$file"
# Output file contents
cat "$file"
# Emit file footer
printf "\n/// end of file %s\n\n" "$file"
((processed_files++))
done
done
if [ $verbose -eq 1 ]; then
echo "Processed $processed_files files in $dir" >&2
fi
}
# Process each directory
local failed=0
local processed=0
# Iterate through all provided directories
for dir in "${dirs[@]}"; do
# Process single directory
if _process_directory "$dir"; then
((processed++))
else
((failed++))
fi
done
# Provide summary
if [ $failed -gt 0 ]; then
echo "Serialization completed with $processed successful and $failed failed directories" >&2
return 1
else
echo "Successfully serialized $processed director(y/ies)" >&2
return 0
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment