Last active
March 2, 2025 09:17
-
-
Save nordinrahman/fd4155749ed9688f943e7fccd3388bc6 to your computer and use it in GitHub Desktop.
This is a script to cleanup a folder and its sub directories, so that they could be compressed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Built using gen AI | |
# https://x.com/i/grok/share/ECyhfrv2uFRV39tDT9izzKnop | |
DRY_RUN=false | |
TARGET_DIR="" | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--dry-run) | |
DRY_RUN=true | |
shift | |
;; | |
*) | |
if [ -z "$TARGET_DIR" ]; then | |
TARGET_DIR="$1" | |
fi | |
shift | |
;; | |
esac | |
done | |
if [ -z "$TARGET_DIR" ]; then | |
echo "Error: Please provide a directory path" | |
echo "Usage: $0 /path/to/directory [--dry-run]" | |
exit 1 | |
fi | |
TARGET_DIR=$(realpath "$TARGET_DIR") | |
if [ ! -d "$TARGET_DIR" ]; then | |
echo "Error: Resolved path does not exist or is not a directory: $TARGET_DIR" | |
exit 1 | |
fi | |
RED='\033[0;31m' | |
GREEN='\033[0;32m' | |
YELLOW='\033[1;33m' | |
NC='\033[0m' | |
command -v dotnet >/dev/null 2>&1 | |
DOTNET_AVAILABLE=$? | |
# Terminal directories (build artifacts, caches, and IDE folders) | |
TERMINAL_DIRS="node_modules bin obj packages target TestResults __pycache__ build dist .vs .npm .yarn .pytest_cache venv .venv .mvn .vscode .idea .eclipse .settings .cache" | |
# Terminal files (common ignored files, expanded for .NET user-specific files) | |
TERMINAL_FILES="*.user *.suo *.csproj.user *.sln.cache *.dbmdl *.pyc *.pyo *.cache project.lock.json npm-debug.log yarn-error.log .coverage coverage.xml *.egg *.log dependency-reduced-pom.xml .project .classpath" | |
process_git_repo() { | |
local dir="$1" | |
echo -e "${GREEN}Processing Git repository at: $dir${NC}" | |
cd "$dir" || return | |
# Get all tracked files from the Git index | |
local tracked_files=$(git ls-files) | |
if [ -n "$tracked_files" ]; then | |
# Check if any tracked files physically exist | |
local tracked_files_exist=false | |
while IFS= read -r file; do | |
if [ -e "$file" ]; then | |
tracked_files_exist=true | |
break | |
fi | |
done <<< "$tracked_files" | |
if [ "$tracked_files_exist" = true ]; then | |
# Check for an existing stash from a prior run | |
local stash_message=$(git stash list --format="%gs" | head -n 1) | |
local current_head=$(git rev-parse HEAD) | |
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "") | |
# Skip stashing if stash exists, matches HEAD, and no tracked files exist physically | |
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ]; then | |
echo "Existing stash 'force-stash-before-backup' found at index 0, based on current HEAD, but tracked files still exist physically." | |
fi | |
# Stage and stash changes if there are any physical tracked files | |
echo "Tracked files exist physically, processing for backup..." | |
local git_status=$(git status --porcelain) | |
if [ -n "$git_status" ]; then | |
echo "Found pending changes or untracked files, staging and stashing them..." | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would stage all changes with: git add .${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would stash changes with: git stash push -m 'force-stash-before-backup' --include-untracked${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would clean with: git clean -x -d -f${NC}" | |
else | |
git add . | |
git stash push -m "force-stash-before-backup" --include-untracked | |
git clean -x -d -f | |
echo "Stashed changes and cleaned untracked/ignored files" | |
fi | |
fi | |
# Remove tracked files | |
echo "Removing tracked files..." | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove tracked files${NC}" | |
else | |
echo "$tracked_files" | xargs -n 100 rm -rf | |
echo "Removed tracked files" | |
fi | |
else | |
echo "All tracked files are physically deleted; repository is already prepared for backup." | |
# Check for an existing stash to confirm | |
local stash_message=$(git stash list --format="%gs" | head -n 1) | |
local current_head=$(git rev-parse HEAD) | |
local stash_base=$(git rev-parse "stash@{0}^1" 2>/dev/null || echo "") | |
if echo "$stash_message" | grep -q "force-stash-before-backup" && [ "$stash_base" = "$current_head" ]; then | |
echo "Confirmed existing stash 'force-stash-before-backup' at index 0 matches current HEAD." | |
else | |
echo "Warning: No valid stash found for current HEAD despite all tracked files being deleted." | |
fi | |
# Clean untracked files if any, without stashing | |
local git_status=$(git status --porcelain) | |
if [ -n "$git_status" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}" | |
else | |
git clean -x -d -f | |
echo "Cleaned untracked files (no stash created as all tracked files are deleted)" | |
fi | |
fi | |
fi | |
else | |
echo "No tracked files exist in index; repository is already prepared for backup." | |
# Clean untracked files if any, without stashing | |
local git_status=$(git status --porcelain) | |
if [ -n "$git_status" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would clean untracked files with: git clean -x -d -f${NC}" | |
else | |
git clean -x -d -f | |
echo "Cleaned untracked files (no stash created as no tracked files present)" | |
fi | |
fi | |
fi | |
# Process submodules | |
if [ -f ".gitmodules" ]; then | |
echo "Found submodules, processing..." | |
git submodule foreach --recursive "'$(realpath "$0")' \"\$path\" $DRY_RUN_FLAG" | |
fi | |
} | |
cleanup_dotnet() { | |
local dir="$1" | |
echo -e "${YELLOW}Processing .NET project at: $dir${NC}" | |
cd "$dir" || return | |
if ls *.csproj *.fsproj *.vbproj >/dev/null 2>&1; then | |
if [ "$DOTNET_AVAILABLE" -eq 0 ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would run: dotnet clean${NC}" | |
else | |
dotnet clean | |
echo "Ran dotnet clean" | |
fi | |
fi | |
if [ "$DRY_RUN" = true ]; then | |
find . -type d \( -name bin -o -name obj -o -name TestResults -o -name \*.user -o -name \*.suo \) -exec echo -e "${YELLOW}[DRY-RUN] Would remove .NET artifact: {}" \; | |
[ -d "packages" ] && echo -e "${YELLOW}[DRY-RUN] Would remove NuGet packages folder${NC}" | |
else | |
find . -type d \( -name bin -o -name obj -o -name TestResults -o -name \*.user -o -name \*.suo \) -print0 | xargs -0 -P 4 rm -rf | |
[ -d "packages" ] && rm -rf "packages" && echo "Removed NuGet packages folder" | |
fi | |
fi | |
} | |
cleanup_non_git() { | |
local dir="$1" | |
echo -e "${YELLOW}Processing non-Git directory at: $dir${NC}" | |
cd "$dir" || return | |
# Handle terminal directories with .gitignore exceptions | |
if [ -f ".gitignore" ]; then | |
for terminal_dir in $TERMINAL_DIRS; do | |
if [ -d "$terminal_dir" ]; then | |
exceptions=$(grep -E "^!$terminal_dir/.*" ".gitignore" | sed "s/^!$terminal_dir\///" | tr '\n' ' ') | |
if [ -n "$exceptions" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would preserve exceptions in $terminal_dir: $exceptions${NC}" | |
echo -e "${YELLOW}[DRY-RUN] Would remove $terminal_dir except exceptions${NC}" | |
else | |
for exception in $exceptions; do | |
if [ -e "$terminal_dir/$exception" ]; then | |
mkdir -p ".tmp_preserve" | |
mv "$terminal_dir/$exception" ".tmp_preserve/" | |
echo "Preserved $terminal_dir/$exception" | |
fi | |
done | |
rm -rf "$terminal_dir" & | |
echo "Removed terminal directory: $terminal_dir" | |
[ -d ".tmp_preserve" ] && mv .tmp_preserve/* "$terminal_dir/" && rmdir .tmp_preserve | |
fi | |
else | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}" | |
else | |
rm -rf "$terminal_dir" & | |
echo "Removed terminal directory: $terminal_dir" | |
fi | |
fi | |
fi | |
done | |
else | |
for terminal_dir in $TERMINAL_DIRS; do | |
if [ -d "$terminal_dir" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $terminal_dir${NC}" | |
else | |
rm -rf "$terminal_dir" & | |
echo "Removed terminal directory: $terminal_dir" | |
fi | |
fi | |
done | |
fi | |
# Handle terminal files | |
for pattern in $TERMINAL_FILES; do | |
find . -maxdepth 1 -type f -name "$pattern" | while read -r file; do | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal file: $file${NC}" | |
else | |
rm -f "$file" & | |
echo "Removed terminal file: $file" | |
fi | |
done | |
done | |
[ "$DRY_RUN" = false ] && wait | |
# .NET cleanup | |
cleanup_dotnet "$dir" | |
# Non-terminal cleanup | |
if [ "$DRY_RUN" = true ]; then | |
find . -type d -name \*.egg-info -exec echo -e "${YELLOW}[DRY-RUN] Would remove Python egg-info folder: {}" \; | |
else | |
find . -type d -name \*.egg-info -print0 | xargs -0 -P 4 rm -rf | |
fi | |
} | |
process_directory() { | |
local dir="$1" | |
if [ ! -d "$dir" ]; then | |
echo -e "${RED}Error: Directory not found: $dir${NC}" | |
return | |
fi | |
cd "$dir" || return | |
# Check if this is a terminal directory | |
dir_name=$(basename "$dir") | |
if echo "$TERMINAL_DIRS" | grep -qw "$dir_name"; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove terminal directory: $dir${NC}" | |
else | |
rm -rf "$dir" && echo "Removed terminal directory: $dir" | |
fi | |
return | |
fi | |
# Check if we're within a Git repository | |
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then | |
local tracked_files=$(git ls-files "$dir") | |
if [ -z "$tracked_files" ]; then | |
if [ "$DRY_RUN" = true ]; then | |
echo -e "${YELLOW}[DRY-RUN] Would remove untracked Git subdirectory: $dir${NC}" | |
else | |
rm -rf "$dir" && echo "Removed untracked Git subdirectory: $dir" | |
fi | |
return | |
fi | |
fi | |
# Process as Git repo or recurse | |
if [ -d ".git" ]; then | |
process_git_repo "$dir" | |
else | |
cleanup_non_git "$dir" | |
find . -maxdepth 1 -type d ! -path . | while read -r subdir; do | |
process_directory "$dir/$subdir" | |
done | |
fi | |
} | |
DRY_RUN_FLAG="" | |
if [ "$DRY_RUN" = true ]; then | |
DRY_RUN_FLAG="--dry-run" | |
fi | |
echo "Starting cleanup process..." | |
echo "Target directory: $TARGET_DIR" | |
echo "Dry run mode: $DRY_RUN" | |
echo "Dotnet CLI available: $([ "$DOTNET_AVAILABLE" -eq 0 ] && echo "Yes" || echo "No")" | |
echo "-----------------------------" | |
process_directory "$TARGET_DIR" | |
echo "-----------------------------" | |
echo "Cleanup process completed" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment