Last active
May 5, 2025 04:58
-
-
Save nordinrahman/fd4155749ed9688f943e7fccd3388bc6 to your computer and use it in GitHub Desktop.
This is a script to cleanup a folder and its sub directories, so that they could be compressed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Resolve absolute path of the script | |
SCRIPT_PATH=$(realpath "$0") | |
DRY_RUN=false | |
TARGET_DIR="" | |
# Set MAX_JOBS to number of logical processors, fallback to 4 | |
MAX_JOBS=${MAX_JOBS:-$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)} | |
# Flag to suppress startup header in recursive calls | |
SILENT=false | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--dry-run) | |
DRY_RUN=true | |
shift | |
;; | |
--max-jobs) | |
MAX_JOBS="$2" | |
shift 2 | |
;; | |
--silent) | |
SILENT=true | |
shift | |
;; | |
*) | |
if [ -z "$TARGET_DIR" ]; then | |
TARGET_DIR="$1" | |
fi | |
shift | |
;; | |
esac | |
done | |
if [ -z "$TARGET_DIR" ]; then | |
echo "Error: Please provide a directory path" | |
echo "Usage: $0 /path/to/directory [--dry-run] [--max-jobs N] [--silent]" | |
exit 1 | |
fi | |
TARGET_DIR=$(realpath "$TARGET_DIR") | |
if [ ! -d "$TARGET_DIR" ]; then | |
echo "Error: Resolved path does not exist or is not a directory: $TARGET_DIR" | |
exit 1 | |
fi | |
RED='\033[0;31m' | |
GREEN='\033[0;32m' | |
YELLOW='\033[1;33m' | |
NC='\033[0m' | |
command -v dotnet >/dev/null 2>&1 | |
DOTNET_AVAILABLE=$? | |
# Terminal directories (build artifacts, caches, and IDE folders) | |
TERMINAL_DIRS="node_modules bin obj packages target TestResults __pycache__ build dist .vs .npm .yarn .pytest_cache venv .venv .mvn .vscode .idea .eclipse .settings .cache" | |
# Terminal files (common ignored files, expanded for .NET user-specific files) | |
TERMINAL_FILES="*.user *.suo *.csproj.user *.sln.cache *.dbmdl *.pyc *.pyo *.cache project.lock.json npm-debug.log yarn-error.log .coverage coverage.xml *.egg *.log dependency-reduced-pom.xml .project .classpath" | |
process_git_repo() { | |
local dir="$1" | |
echo -e "${GREEN}Processing Git repository at: $dir${NC}" | |
cd "$dir" || return | |
# Check if the repository has a valid HEAD (i.e., at least one commit) | |
if ! git rev-parse --verify HEAD >/dev/null 2>&1; then | |
echo -e "${YELLOW}Warning: No valid HEAD found in repository (possibly empty or uninitialized). Treating as empty repository.${NC}" | |
# Clean untracked files if any, without stashing | |
local git_status=$(git status --porcelain 2>/dev/null) | |
if [ -n "$git_status" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}" | |
else | |
git clean -x -d -f >/dev/null 2>&1 | |
echo "Cleaned untracked files (no stash created as repository is empty)" | |
fi | |
fi | |
# Process submodules if any | |
if [ -f ".gitmodules" ]; then | |
echo "Found submodules, processing in parallel..." | |
git submodule status --recursive 2>/dev/null | awk '{print $2}' | xargs -P "$MAX_JOBS" -I {} bash "$SCRIPT_PATH" "{}" $DRY_RUN_FLAG --silent | |
fi | |
return | |
fi | |
# Get all tracked files from the Git index | |
local tracked_files=$(git ls-files) | |
if [ -n "$tracked_files" ]; then | |
# Check if any tracked files physically exist | |
local tracked_files_exist=false | |
while IFS= read -r file; do | |
if [ -e "$file" ]; then | |
tracked_files_exist=true | |
break | |
fi | |
done <<< "$tracked_files" | |
if [ "$tracked_files_exist" = true ]; then | |
# Check for an existing stash from a prior run | |
local stash_message=$(git stash list --format="%gs" | head -n 1) | |
local current_head=$(git rev-parse HEAD 2>/dev/null || echo "") | |
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "") | |
local git_status=$(git status --porcelain) | |
# Check if all changes are deletions | |
local all_deletions=true | |
if [ -n "$git_status" ]; then | |
while IFS= read -r line; do | |
if ! echo "$line" | grep -q "^D "; then | |
all_deletions=false | |
break | |
fi | |
done <<< "$git_status" | |
fi | |
# Skip stashing if stash exists, matches HEAD, and all changes are deletions | |
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ] && [ "$all_deletions" = true ]; then | |
echo "Existing stash 'force-stash-before-backup' found at index 0, based on current HEAD with all tracked files pending deletion. Skipping stash creation." | |
else | |
# Stage and stash changes if there are any | |
echo "Tracked files exist physically or non-deletion changes detected, processing for backup..." | |
if [ -n "$git_status" ]; then | |
echo "Found pending changes or untracked files, staging and stashing them..." | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would stage all changes with: git add .${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would stash changes with: git stash push -m 'force-stash-before-backup' --include-untracked${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would clean with: git clean -x -d -f${NC}" | |
else | |
git add . | |
git stash push -m "force-stash-before-backup" --include-untracked | |
git clean -x -d -f | |
echo "Stashed changes and cleaned untracked/ignored files" | |
fi | |
fi | |
# Remove tracked files | |
echo "Removing tracked files..." | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove tracked files${NC}" | |
else | |
echo "$tracked_files" | xargs -P "$MAX_JOBS" -n 100 rm -rf | |
echo "Removed tracked files" | |
fi | |
fi | |
else | |
echo "All tracked files are physically deleted; repository is already prepared for backup." | |
# Check for an existing stash to confirm | |
local stash_message=$(git stash list --format="%gs" | head -n 1) | |
local current_head=$(git rev-parse HEAD 2>/dev/null || echo "") | |
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "") | |
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ]; then | |
echo "Confirmed existing stash 'force-stash-before-backup' at index 0 matches current HEAD." | |
else | |
echo "Note: No valid stash found for current HEAD, but all tracked files are deleted." | |
fi | |
# Clean untracked files if any, without stashing | |
local git_status=$(git status --porcelain) | |
if [ -n "$git_status" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}" | |
else | |
git clean -x -d -f | |
echo "Cleaned untracked files (no stash created as all tracked files are deleted)" | |
fi | |
fi | |
fi | |
else | |
echo "No tracked files exist in index; repository is already prepared for backup." | |
# Clean untracked files if any, without stashing | |
local git_status=$(git status --porcelain) | |
if [ -n "$git_status" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}" | |
else | |
git clean -x -d -f | |
echo "Cleaned untracked files (no stash created as no tracked files present)" | |
fi | |
fi | |
fi | |
# Process submodules in parallel | |
if [ -f ".gitmodules" ]; then | |
echo "Found submodules, processing in parallel..." | |
git submodule status --recursive 2>/dev/null | awk '{print $2}' | xargs -P "$MAX_JOBS" -I {} bash "$SCRIPT_PATH" "{}" $DRY_RUN_FLAG --silent | |
fi | |
} | |
cleanup_dotnet() { | |
local dir="$1" | |
echo -e "${YELLOW}Processing .NET project at: $dir${NC}" | |
cd "$dir" || return | |
if ls *.csproj *.fsproj *.vbproj >/dev/null 2>&1; then | |
if [ "$DOTNET_AVAILABLE" -eq 0 ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would run: dotnet clean${NC}" | |
else | |
dotnet clean | |
echo "Ran dotnet clean" | |
fi | |
fi | |
if [ "$DRY_RUN" = true ]; then | |
find . -type d \( -name bin -o -name obj -o -name TestResults \) -exec echo -e "${YELLOW}[DRY-RUN] Would remove .NET artifact: {}" \; | |
[ -d "packages" ] && echo -e "${YELLOW}[DRY-RUN] Would remove NuGet packages folder${NC}" | |
else | |
find . -type d \( -name bin -o -name obj -o -name TestResults \) -print0 | xargs -0 -P "$MAX_JOBS" rm -rf | |
[ -d "packages" ] && rm -rf "packages" && echo "Removed NuGet packages folder" | |
fi | |
fi | |
} | |
cleanup_non_git() { | |
local dir="$1" | |
echo -e "${YELLOW}Processing non-Git directory at: $dir${NC}" | |
cd "$dir" || return | |
# Handle terminal directories with .gitignore exceptions | |
if [ -f ".gitignore" ]; then | |
for terminal_dir in $TERMINAL_DIRS; do | |
if [ -d "$terminal_dir" ]; then | |
exceptions=$(grep -E "^!$terminal_dir/.*" ".gitignore" | sed "s/^!$terminal_dir\///" | tr '\n' ' ') | |
if [ -n "$exceptions" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would preserve exceptions in $terminal_dir: $exceptions${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would remove $terminal_dir except exceptions${NC}" | |
else | |
for exception in $exceptions; do | |
if [ -e "$terminal_dir/$exception" ]; then | |
mkdir -p ".tmp_preserve" | |
mv "$terminal_dir/$exception" ".tmp_preserve/" | |
echo "Preserved $terminal_dir/$exception" | |
fi | |
done | |
rm -rf "$terminal_dir" | |
echo "Removed terminal directory: $terminal_dir" | |
[ -d ".tmp_preserve" ] && mv .tmp_preserve/* "$terminal_dir/" && rmdir .tmp_preserve | |
fi | |
else | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}" | |
else | |
rm -rf "$terminal_dir" | |
echo "Removed terminal directory: $terminal_dir" | |
fi | |
fi | |
fi | |
done | |
else | |
# Parallel deletion of terminal directories | |
echo "$TERMINAL_DIRS" | tr ' ' '\n' | grep -v '^$' | while read -r terminal_dir; do | |
if [ -d "$terminal_dir" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}" | |
else | |
rm -rf "$terminal_dir" & | |
echo "Removed terminal directory: $terminal_dir" | |
fi | |
fi | |
done | |
[ "$DRY_RUN" = false ] && wait | |
fi | |
# Parallel deletion of terminal files | |
for pattern in $TERMINAL_FILES; do | |
if [ "$DRY_RUN" = true ]; then | |
find . -maxdepth 1 -type f -name "$pattern" -exec echo -e "${YELLOW}[DRY-RUN] Would remove terminal file: {}" \; | |
else | |
find . -maxdepth 1 -type f -name "$pattern" -print0 | xargs -0 -P "$MAX_JOBS" rm -f & | |
fi | |
done | |
[ "$DRY_RUN" = false ] && wait | |
# .NET cleanup | |
cleanup_dotnet "$dir" | |
# Non-terminal cleanup | |
if [ "$DRY_RUN" = true ]; then | |
find . -type d -name \*.egg-info -exec echo -e "${YELLOW}[DRY-RUN] Would remove Python egg-info folder: {}" \; | |
else | |
find . -type d -name \*.egg-info -print0 | xargs -0 -P "$MAX_JOBS" rm -rf | |
fi | |
} | |
process_directory() { | |
local dir="$1" | |
if [ ! -d "$dir" ]; then | |
echo -e "${RED}Error: Directory not found: $dir${NC}" | |
return | |
fi | |
cd "$dir" || return | |
# Check if this is a terminal directory | |
dir_name=$(basename "$dir") | |
if echo "$TERMINAL_DIRS" | grep -qw "$dir_name"; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $dir${NC}" | |
else | |
rm -rf "$dir" && echo "Removed terminal directory: $dir" | |
fi | |
return | |
fi | |
# Check if we're within a Git repository | |
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then | |
local tracked_files=$(git ls-files "$dir") | |
if [ -z "$tracked_files" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove untracked Git subdirectory: $dir${NC}" | |
else | |
rm -rf "$dir" && echo "Removed untracked Git subdirectory: $dir" | |
fi | |
return | |
fi | |
fi | |
# Process as Git repo or recurse | |
if [ -d ".git" ]; then | |
process_git_repo "$dir" | |
else | |
cleanup_non_git "$dir" | |
# Parallel processing of subdirectories | |
find . -maxdepth 1 -type d ! -path . | xargs -P "$MAX_JOBS" -I {} bash "$SCRIPT_PATH" "{}" $DRY_RUN_FLAG --silent | |
fi | |
} | |
DRY_RUN_FLAG="" | |
if [ "$DRY_RUN" = true ]; then | |
DRY_RUN_FLAG="--dry-run" | |
fi | |
if [ "$SILENT" = false ]; then | |
echo "Starting cleanup process..." | |
echo "Target directory: $TARGET_DIR" | |
echo "Dry run mode: $DRY_RUN" | |
echo "Max parallel jobs: $MAX_JOBS" | |
echo "Dotnet CLI available: $([ "$DOTNET_AVAILABLE" -eq 0 ] && echo "Yes" || echo "No")" | |
echo "-----------------------------" | |
fi | |
process_directory "$TARGET_DIR" | |
if [ "$SILENT" = false ]; then | |
echo "-----------------------------" | |
echo "Cleanup process completed" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment