Created
January 19, 2013 05:32
-
-
Save aaronzirbes/4570924 to your computer and use it in GitHub Desktop.
This script will help you remove large files from your git repo history and shrink the size of your repository.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "Finding and Purging Big Files From Git History" | |
echo "==============================================" | |
echo "" | |
echo "http://naleid.com/blog/2012/01/17/finding-and-purging-big-files-from-git-history/" | |
echo "" | |
pushd "$(git rev-parse --show-toplevel)" > /dev/null | |
echo "What object SHA is associated with each file in the Repo?" | |
if [ ! -e _allfileshas.txt ]; then | |
git rev-list --objects --all |sort -k 2 |egrep ' [a-zA-Z]+' > _allfileshas.txt | |
fi | |
echo "What Unique Files Exist Throughout The History of My Git Repo?" | |
if [ ! -e _uniquefiles.txt ]; then | |
cat _allfileshas.txt |cut -f 2 -d\ |uniq > _uniquefiles.txt | |
fi | |
echo "How Big Are The Files In My Repo?" | |
if [ ! -e _bigobjects.txt ]; then | |
git gc && git verify-pack -v .git/objects/pack/pack-*.idx |egrep "^\w+ blob\W+[0-9]+ [0-9]+ [0-9]+$" |sort -k 3 -n -r > _bigobjects.txt | |
fi | |
echo "Take that result and iterate through each line of it to find the SHA, file size in bytes, and real file name" | |
if [ ! -e _bigtosmall.txt ]; then | |
sort _allfileshas.txt > _allfileshas.shasort | |
sort _bigobjects.txt > _bigobjects.shasort | |
join _bigobjects.shasort _allfileshas.shasort |awk '{print $1,$3,$6}' |sort -r -n -k 2 > _bigtosmall.txt | |
rm _allfileshas.shasort | |
rm _bigobjects.shasort | |
fi | |
echo "Done." | |
echo "" | |
echo "For example:" | |
echo "To shrink your repo by removing all files matching '*.sql' and '*.sql.gz', run the following commands:" | |
echo "" | |
repo_path=`pwd` | |
repo_name=`pwd |sed -e 's/.*\///'` | |
echo "git filter-branch --prune-empty --index-filter 'git rm -rf --cached --ignore-unmatch *.sql.gz *.sql' --tag-name-filter cat -- --all" | |
echo "cd .." | |
echo "git clone --no-hardlinks file://${repo_path} ${repo_name}.shrink" | |
echo "cd " | |
popd > /dev/null | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There's a problem if files have spaces in their name, I have a fix on this fork: https://gist.github.com/CharlesB2/de3fdfb1b010120d4bb9defc3ac6122f