Skip to content

Instantly share code, notes, and snippets.

@pythoninthegrass
Last active January 16, 2025 19:51
Show Gist options
  • Save pythoninthegrass/8027a1f65b9c52f9817347550c0087c0 to your computer and use it in GitHub Desktop.
Save pythoninthegrass/8027a1f65b9c52f9817347550c0087c0 to your computer and use it in GitHub Desktop.
Clean up git directories (.venv, .terraform, node_modules, and target). Then archive repos.
#!/usr/bin/env bash
# shellcheck disable=SC2155
# Environment variable overrides with defaults
OUTPUT_DIR="${OUTPUT_DIR:-${HOME}/Downloads/git_archive}"
ARCHIVE_FORMAT="${FORMAT:-zip}"
MAX_DEPTH="${MAX_DEPTH:-2}"
# Create output directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to get archive extension
get_extension() {
case "$1" in
zip) echo "zip" ;;
tar) echo "tar" ;;
tar.gz|tgz) echo "tar.gz" ;;
*) echo "zip" ;;
esac
}
# Function to check if archive is recent (within last 30 minutes)
is_recent_archive() {
local archive_file="$1"
local max_age=1800 # 30 minutes in seconds
if [ ! -f "$archive_file" ]; then
return 1
fi
local file_age=$(( $(date +%s) - $(stat -f %m "$archive_file") ))
[ "$file_age" -lt "$max_age" ]
}
# Function to process a git directory
process_git_dir() {
local dir="$1"
local repo_root="$(dirname "$dir")"
local base_name="$(basename "$repo_root")"
local extension="$(get_extension "$ARCHIVE_FORMAT")"
local output_file="${OUTPUT_DIR}/${base_name}.${extension}"
# Check if recent archive exists
if is_recent_archive "$output_file"; then
echo -e "${BLUE}Skipping $repo_root: Recent archive exists ($output_file)${NC}"
return 0
fi
cd "$repo_root" || {
echo -e "${RED}Error: Cannot access directory $repo_root${NC}" >&2
return 1
}
# Check if it's a git repository
if ! git rev-parse --git-dir > /dev/null 2>&1; then
echo -e "${YELLOW}Warning: $repo_root is not a valid git repository${NC}" >&2
return 1
fi
# Check if repository has any commits
if ! git rev-parse --verify HEAD > /dev/null 2>&1; then
echo -e "${YELLOW}Skipping $repo_root: Empty repository (no commits)${NC}"
return 1
fi
# Check if repository has any tracked files
if git ls-files --error-unmatch . > /dev/null 2>&1; then
case "$ARCHIVE_FORMAT" in
tar) git archive --format=tar --output="$output_file" HEAD ;;
tar.gz|tgz) git archive --format=tar HEAD | gzip > "$output_file" ;;
*) git archive --format=zip --output="$output_file" HEAD ;;
esac
if [ $? -eq 0 ]; then
echo -e "${GREEN}Successfully archived $repo_root to $output_file${NC}"
else
echo -e "${RED}Failed to archive $repo_root${NC}" >&2
return 1
fi
else
echo -e "${YELLOW}Skipping $repo_root: No tracked files${NC}"
return 1
fi
}
main() {
local dir="${1:-.}"
REPO_DIR="$dir"
# Count total repositories
TOTAL_REPOS=$(find "$REPO_DIR" -maxdepth "$MAX_DEPTH" -name ".git" -type d | wc -l)
if [ "$TOTAL_REPOS" -eq 0 ]; then
echo -e "${YELLOW}No git repositories found in $REPO_DIR${NC}"
exit 0
fi
echo "Found $TOTAL_REPOS git repositories in $REPO_DIR"
echo "Output directory: $OUTPUT_DIR"
echo "Archive format: $ARCHIVE_FORMAT"
# Export functions and variables for xargs
export -f process_git_dir
export -f get_extension
export -f is_recent_archive
export ARCHIVE_FORMAT OUTPUT_DIR RED GREEN YELLOW BLUE NC
# Process repositories in parallel
find "$REPO_DIR" -maxdepth "$MAX_DEPTH" -name ".git" -type d -print0 \
| xargs -0 -P "$(sysctl -n hw.ncpu)" -I {} \
bash -c 'process_git_dir "$@"' _ {}
}
main "$@"
#!/usr/bin/env bash
# Define directories to clean up
readarray -t CLEAN_DIRS << EOL
.devbox
.terraform
.venv
node_modules
target
EOL
cleanup() {
local dir="$1"
shift
local dry_run=false
while [[ "$#" -gt 0 ]]; do
case "$1" in
-n|--dry-run)
dry_run=true
;;
*)
echo "Unknown option: $1"
return 1
;;
esac
shift
done
mapfile -t results < <(find "$dir" -maxdepth 2 -type d \( \
$(printf -- "-name %s -o " "${CLEAN_DIRS[@]}" | sed 's/ -o $//') \
\) -print)
if [[ "$dry_run" == true ]]; then
echo "Dry run mode: No files will be deleted."
printf "%s\n" "${results[@]}"
else
for result in "${results[@]}"; do
echo "Deleting: $result"
rm -rf "$result"
done
fi
}
main() {
local dir="${1:-.}"
shift || true
cleanup "$dir" "$@"
}
main "$@"
@pythoninthegrass
Copy link
Author

git clone [email protected]:8027a1f65b9c52f9817347550c0087c0.git git_archive
cd git_archive/
ln -s $(pwd)/archive.sh ~/.local/bin/git-archive
cd ~/git
export OUTPUT_DIR="$HOME/Downloads/git_backup"
export ARCHIVE_FORMAT="zip"
export MAX_DEPTH="2"
git-archive

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment