Last active
January 29, 2026 16:56
-
-
Save joshjohanning/dea45c25a0ba1a6abe81376d55f63481 to your computer and use it in GitHub Desktop.
finding files over 100mb in git examples. One Bitbucket-specific and one generic with a repo URL list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Set your server details | |
| BB_SERVER="https://bitbucket.yourcompany.com" | |
| PROJECT="YOUR_PROJECT" | |
| BB_TOKEN="your-personal-access-token" | |
| # Get all repos in project | |
| repos=$(curl -s -H "Authorization: Bearer $BB_TOKEN" \ | |
| "$BB_SERVER/rest/api/1.0/projects/$PROJECT/repos?limit=1000" \ | |
| | jq -r '.values[].links.clone[] | select(.name=="http") | .href') | |
| for repo_url in $repos; do | |
| repo_name=$(basename "$repo_url" .git) | |
| echo "=== Checking: $repo_name ===" | |
| git clone --bare "$repo_url" "/tmp/$repo_name.git" 2>/dev/null | |
| cd "/tmp/$repo_name.git" | |
| # Find files over 100MB | |
| git rev-list --objects --all | \ | |
| git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)' | \ | |
| awk '/^blob/ && $3 >= 104857600 {print $3/1048576 "MB", $4}' | \ | |
| sort -rn | |
| cd - > /dev/null | |
| rm -rf "/tmp/$repo_name.git" | |
| done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Finds files over a specified size (default: 100MB) across multiple git repositories | |
| # Reads repository URLs from a text file (one URL per line) | |
| # | |
| # Prerequisites: | |
| # - git must be installed | |
| # - For macOS: brew install coreutils (for numfmt/gnumfmt) | |
| # | |
| # Usage: | |
| # ./find-large-files-in-repositories.sh <repos-file> [size-in-mb] | |
| # | |
| # Example: | |
| # ./find-large-files-in-repositories.sh repos.txt 100 | |
| # | |
| # repos.txt format (one repository URL per line): | |
| # https://github.com/owner/repo1.git | |
| # https://github.com/owner/repo2.git | |
| # [email protected]:owner/repo3.git | |
| if [ -z "$1" ]; then | |
| echo "Usage: $0 <repos-file> [size-in-mb]" | |
| echo " repos-file: Path to a text file containing repository URLs (one per line)" | |
| echo " size-in-mb: Minimum file size in MB to report (default: 100)" | |
| exit 1 | |
| fi | |
| REPOS_FILE="$1" | |
| SIZE_MB="${2:-100}" | |
| SIZE_BYTES=$((SIZE_MB * 1048576)) | |
| if [ ! -f "$REPOS_FILE" ]; then | |
| echo "Error: File '$REPOS_FILE' not found" | |
| exit 1 | |
| fi | |
| # Create a temporary directory for clones | |
| TEMP_DIR=$(mktemp -d) | |
| trap "rm -rf $TEMP_DIR" EXIT | |
| echo "Finding files >= ${SIZE_MB}MB in repositories listed in $REPOS_FILE" | |
| echo "============================================================" | |
| echo "" | |
| while IFS= read -r repo_url || [ -n "$repo_url" ]; do | |
| # Skip empty lines and comments | |
| [[ -z "$repo_url" || "$repo_url" =~ ^# ]] && continue | |
| repo_name=$(basename "$repo_url" .git) | |
| echo "=== Checking: $repo_name ===" | |
| echo " URL: $repo_url" | |
| clone_path="$TEMP_DIR/$repo_name.git" | |
| if ! git clone --bare "$repo_url" "$clone_path" 2>/dev/null; then | |
| echo " Error: Failed to clone repository" | |
| echo "" | |
| continue | |
| fi | |
| cd "$clone_path" || continue | |
| # Find files over the specified size | |
| large_files=$(git rev-list --objects --all 2>/dev/null | \ | |
| git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)' 2>/dev/null | \ | |
| awk -v size="$SIZE_BYTES" '/^blob/ && $3 >= size {printf " %.2fMB %s\n", $3/1048576, $4}' | \ | |
| sort -rn) | |
| if [ -n "$large_files" ]; then | |
| echo "$large_files" | |
| else | |
| echo " No files >= ${SIZE_MB}MB found" | |
| fi | |
| cd - > /dev/null || exit | |
| rm -rf "$clone_path" | |
| echo "" | |
| done < "$REPOS_FILE" | |
| echo "============================================================" | |
| echo "Scan complete" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment