Skip to content

Instantly share code, notes, and snippets.

@nordinrahman
Last active March 2, 2025 09:17
Show Gist options
  • Save nordinrahman/fd4155749ed9688f943e7fccd3388bc6 to your computer and use it in GitHub Desktop.
Save nordinrahman/fd4155749ed9688f943e7fccd3388bc6 to your computer and use it in GitHub Desktop.
This is a script to cleanup a folder and its sub directories, so that they could be compressed
#!/bin/bash
# Built using gen AI
# https://x.com/i/grok/share/ECyhfrv2uFRV39tDT9izzKnop
DRY_RUN=false
TARGET_DIR=""
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
*)
if [ -z "$TARGET_DIR" ]; then
TARGET_DIR="$1"
fi
shift
;;
esac
done
if [ -z "$TARGET_DIR" ]; then
echo "Error: Please provide a directory path"
echo "Usage: $0 /path/to/directory [--dry-run]"
exit 1
fi
TARGET_DIR=$(realpath "$TARGET_DIR")
if [ ! -d "$TARGET_DIR" ]; then
echo "Error: Resolved path does not exist or is not a directory: $TARGET_DIR"
exit 1
fi
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
command -v dotnet >/dev/null 2>&1
DOTNET_AVAILABLE=$?
# Terminal directories (build artifacts, caches, and IDE folders)
TERMINAL_DIRS="node_modules bin obj packages target TestResults __pycache__ build dist .vs .npm .yarn .pytest_cache venv .venv .mvn .vscode .idea .eclipse .settings .cache"
# Terminal files (common ignored files, expanded for .NET user-specific files)
TERMINAL_FILES="*.user *.suo *.csproj.user *.sln.cache *.dbmdl *.pyc *.pyo *.cache project.lock.json npm-debug.log yarn-error.log .coverage coverage.xml *.egg *.log dependency-reduced-pom.xml .project .classpath"
process_git_repo() {
local dir="$1"
echo -e "${GREEN}Processing Git repository at: $dir${NC}"
cd "$dir" || return
# Get all tracked files from the Git index
local tracked_files=$(git ls-files)
if [ -n "$tracked_files" ]; then
# Check if any tracked files physically exist
local tracked_files_exist=false
while IFS= read -r file; do
if [ -e "$file" ]; then
tracked_files_exist=true
break
fi
done <<< "$tracked_files"
if [ "$tracked_files_exist" = true ]; then
# Check for an existing stash from a prior run
local stash_message=$(git stash list --format="%gs" | head -n 1)
local current_head=$(git rev-parse HEAD)
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "")
# Skip stashing if stash exists, matches HEAD, and no tracked files exist physically
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ]; then
echo "Existing stash 'force-stash-before-backup' found at index 0, based on current HEAD, but tracked files still exist physically."
fi
# Stage and stash changes if there are any physical tracked files
echo "Tracked files exist physically, processing for backup..."
local git_status=$(git status --porcelain)
if [ -n "$git_status" ]; then
echo "Found pending changes or untracked files, staging and stashing them..."
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would stage all changes with: git add .${NC}"
echo -e "${YELLOW}[DRY-RUN] Would stash changes with: git stash push -m 'force-stash-before-backup' --include-untracked${NC}"
echo -e "${YELLOW}[DRY-RUN] Would clean with: git clean -x -d -f${NC}"
else
git add .
git stash push -m "force-stash-before-backup" --include-untracked
git clean -x -d -f
echo "Stashed changes and cleaned untracked/ignored files"
fi
fi
# Remove tracked files
echo "Removing tracked files..."
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove tracked files${NC}"
else
echo "$tracked_files" | xargs -n 100 rm -rf
echo "Removed tracked files"
fi
else
echo "All tracked files are physically deleted; repository is already prepared for backup."
# Check for an existing stash to confirm
local stash_message=$(git stash list --format="%gs" | head -n 1)
local current_head=$(git rev-parse HEAD)
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "")
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ]; then
echo "Confirmed existing stash 'force-stash-before-backup' at index 0 matches current HEAD."
else
echo "Warning: No valid stash found for current HEAD despite all tracked files being deleted."
fi
# Clean untracked files if any, without stashing
local git_status=$(git status --porcelain)
if [ -n "$git_status" ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}"
else
git clean -x -d -f
echo "Cleaned untracked files (no stash created as all tracked files are deleted)"
fi
fi
fi
else
echo "No tracked files exist in index; repository is already prepared for backup."
# Clean untracked files if any, without stashing
local git_status=$(git status --porcelain)
if [ -n "$git_status" ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}"
else
git clean -x -d -f
echo "Cleaned untracked files (no stash created as no tracked files present)"
fi
fi
fi
# Process submodules
if [ -f ".gitmodules" ]; then
echo "Found submodules, processing..."
git submodule foreach --recursive "'$(realpath "$0")' \"\$path\" $DRY_RUN_FLAG"
fi
}
cleanup_dotnet() {
local dir="$1"
echo -e "${YELLOW}Processing .NET project at: $dir${NC}"
cd "$dir" || return
if ls *.csproj *.fsproj *.vbproj >/dev/null 2>&1; then
if [ "$DOTNET_AVAILABLE" -eq 0 ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would run: dotnet clean${NC}"
else
dotnet clean
echo "Ran dotnet clean"
fi
fi
if [ "$DRY_RUN" = true ]; then
find . -type d \( -name bin -o -name obj -o -name TestResults -o -name \*.user -o -name \*.suo \) -exec echo -e "${YELLOW}[DRY-RUN] Would remove .NET artifact: {}" \;
[ -d "packages" ] && echo -e "${YELLOW}[DRY-RUN] Would remove NuGet packages folder${NC}"
else
find . -type d \( -name bin -o -name obj -o -name TestResults -o -name \*.user -o -name \*.suo \) -print0 | xargs -0 -P 4 rm -rf
[ -d "packages" ] && rm -rf "packages" && echo "Removed NuGet packages folder"
fi
fi
}
cleanup_non_git() {
local dir="$1"
echo -e "${YELLOW}Processing non-Git directory at: $dir${NC}"
cd "$dir" || return
# Handle terminal directories with .gitignore exceptions
if [ -f ".gitignore" ]; then
for terminal_dir in $TERMINAL_DIRS; do
if [ -d "$terminal_dir" ]; then
exceptions=$(grep -E "^!$terminal_dir/.*" ".gitignore" | sed "s/^!$terminal_dir\///" | tr '\n' ' ')
if [ -n "$exceptions" ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would preserve exceptions in $terminal_dir: $exceptions${NC}"
echo -e "${YELLOW}[DRY-RUN] Would remove $terminal_dir except exceptions${NC}"
else
for exception in $exceptions; do
if [ -e "$terminal_dir/$exception" ]; then
mkdir -p ".tmp_preserve"
mv "$terminal_dir/$exception" ".tmp_preserve/"
echo "Preserved $terminal_dir/$exception"
fi
done
rm -rf "$terminal_dir" &
echo "Removed terminal directory: $terminal_dir"
[ -d ".tmp_preserve" ] && mv .tmp_preserve/* "$terminal_dir/" && rmdir .tmp_preserve
fi
else
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}"
else
rm -rf "$terminal_dir" &
echo "Removed terminal directory: $terminal_dir"
fi
fi
fi
done
else
for terminal_dir in $TERMINAL_DIRS; do
if [ -d "$terminal_dir" ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}"
else
rm -rf "$terminal_dir" &
echo "Removed terminal directory: $terminal_dir"
fi
fi
done
fi
# Handle terminal files
for pattern in $TERMINAL_FILES; do
find . -maxdepth 1 -type f -name "$pattern" | while read -r file; do
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove terminal file: $file${NC}"
else
rm -f "$file" &
echo "Removed terminal file: $file"
fi
done
done
[ "$DRY_RUN" = false ] && wait
# .NET cleanup
cleanup_dotnet "$dir"
# Non-terminal cleanup
if [ "$DRY_RUN" = true ]; then
find . -type d -name \*.egg-info -exec echo -e "${YELLOW}[DRY-RUN] Would remove Python egg-info folder: {}" \;
else
find . -type d -name \*.egg-info -print0 | xargs -0 -P 4 rm -rf
fi
}
process_directory() {
local dir="$1"
if [ ! -d "$dir" ]; then
echo -e "${RED}Error: Directory not found: $dir${NC}"
return
fi
cd "$dir" || return
# Check if this is a terminal directory
dir_name=$(basename "$dir")
if echo "$TERMINAL_DIRS" | grep -qw "$dir_name"; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $dir${NC}"
else
rm -rf "$dir" && echo "Removed terminal directory: $dir"
fi
return
fi
# Check if we're within a Git repository
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
local tracked_files=$(git ls-files "$dir")
if [ -z "$tracked_files" ]; then
if [ "$DRY_RUN" = true ]; then
echo -e "${YELLOW}[DRY-RUN] Would remove untracked Git subdirectory: $dir${NC}"
else
rm -rf "$dir" && echo "Removed untracked Git subdirectory: $dir"
fi
return
fi
fi
# Process as Git repo or recurse
if [ -d ".git" ]; then
process_git_repo "$dir"
else
cleanup_non_git "$dir"
find . -maxdepth 1 -type d ! -path . | while read -r subdir; do
process_directory "$dir/$subdir"
done
fi
}
DRY_RUN_FLAG=""
if [ "$DRY_RUN" = true ]; then
DRY_RUN_FLAG="--dry-run"
fi
echo "Starting cleanup process..."
echo "Target directory: $TARGET_DIR"
echo "Dry run mode: $DRY_RUN"
echo "Dotnet CLI available: $([ "$DOTNET_AVAILABLE" -eq 0 ] && echo "Yes" || echo "No")"
echo "-----------------------------"
process_directory "$TARGET_DIR"
echo "-----------------------------"
echo "Cleanup process completed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment