Skip to content

Instantly share code, notes, and snippets.

@bjornfor
Last active August 29, 2015 14:15
Show Gist options
  • Save bjornfor/bb96b09d4bd1a488cd01 to your computer and use it in GitHub Desktop.
Save bjornfor/bb96b09d4bd1a488cd01 to your computer and use it in GitHub Desktop.
Test if git clones (at specific revision) can be deterministic
#!/usr/bin/env bash
# A test to see if a deterministic fetchgit with leaveDotGit is possible.
# Short answer; yes.
#
# Q: Can .git/index be deterministic?
# A: No, impossible (it contains inode numbers). If you *must* have it, remove
# it before calculating checksums, then re-create it from the repo contents.
create_annotated_tag()
{
local repo="$1"
local tag="$2"
echo "# creating annotated tag $tag in $repo"
(cd "$repo" && git tag -a -m "Message for tag $tag" "$tag")
}
create_lightweight_tag()
{
local repo="$1"
local tag="$2"
echo "# creating lightweight tag $tag in $repo"
(cd "$repo" && git tag "$tag")
}
initialize_repo()
{
local repo="$1"
rm -rf "$repo"
mkdir -p "$repo"
(cd "$repo" && git init .)
# Create first commit
(cd "$repo" \
&& echo "hello" >README.txt \
&& git add . \
&& git commit -m "Initial commit" \
)
}
create_n_commits()
{
local repo="$1"
local num_commits="$2"
printf "Creating $num_commits commits in $repo"
# git rev-list HEAD --count
for i in $(seq "$num_commits"); do
echo "# adding a commit to $repo"
echo "$i" >>"$repo"/file
(cd "$repo" && git add . && git commit -m "some change")
done | while read; do printf .; done
echo
}
show_log()
{
local repo="$1"
echo "# log of $repo:"
(cd "$repo" && git log --pretty=oneline --decorate)
}
should_be_equal()
{
if [ "$1" != "$2" ]; then
echo "FAIL: $1 and $2 differ$3"
exit 1
fi
}
should_not_be_equal()
{
if [ "$1" = "$2" ]; then
echo "FAIL: $1 and $2 are equal$3"
exit 1
fi
}
make_deterministic_clone()
{
local url="$1"
local rev="$2"
local dest="$3"
rm -rf "$dest"
echo "# cloning repo $url"
git clone "$url" "$dest"
echo "# resetting $dest HEAD to $rev ($(cd "$dest" && git describe "$rev"))"
(cd "$dest" && git reset --hard "$rev")
make_deterministic_repo "$dest"
#show_log "$dest"
}
# Remove all remote branches, remove tags not reachable from HEAD, do a full
# repack and then garbage collect unreferenced objects.
make_deterministic_repo(){
local repo="$1"
# run in sub-shell to not touch current working directory
(
cd "$repo"
# Remove files that contain timestamps or otherwise have non-deterministic
# properties.
rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \
.git/refs/remotes/origin/HEAD .git/config
# Remove all remote branches.
git branch -r | while read branch; do
git branch -rD "$branch" >&2
done
# Remove tags not reachable from HEAD. If we're exactly on a tag, don't
# delete it.
maybe_tag=$(git tag --points-at HEAD)
git tag --contains HEAD | while read tag; do
if [ "$tag" != "$maybe_tag" ]; then
git tag -d "$tag" >&2
fi
done
# Do a full repack. Must run single-threaded, or else we lose determinism.
git config pack.threads 1
git repack -A -d -f
rm -f .git/config
# Garbage collect unreferenced objects.
git gc --prune=all
)
}
can_have_deterministic_git_clones()
{
out="$(basename "$0")-output"
repo="$out-repo"
clone1="$out-clone1"
clone2="$out-clone2"
rm -rf "$repo" "$clone1" "$clone2"
# create repo with N commits and a mix of lightweight and annotated tags
initialize_repo "$repo"
create_annotated_tag "$repo" v0.1a
create_n_commits "$repo" 1000
create_lightweight_tag "$repo" v0.2l
create_n_commits "$repo" 100
#show_log "$repo"
# grab the current HEAD id
rev=$(cd "$repo" && git rev-parse HEAD)
echo "# target commit id: rev=$rev ($(cd "$repo" && git describe "$rev"))"
# make first repo clone and check its hash
make_deterministic_clone "$repo" "$rev" "$clone1"
clone_hash1=$(nix-hash --base32 --type sha256 "$clone1")
echo "nix-hash of "$clone1": $clone_hash1"
# add extra commits and tags (both annotated and lightweight)
# AND switch branch
create_n_commits "$repo" 1000
create_annotated_tag "$repo" v0.3a
create_n_commits "$repo" 100
create_lightweight_tag "$repo" v0.4l
#(cd "$repo" && git co -b branch-foo)
#show_log "$repo"
# make second repo clone and check its hash. This clone starts out with
# extra commits on the master branch and a newer git tag. Everything
# not reachable from given $rev must be removed for the clone to be
# deterministic (arguably, it's not a "clone" anymore).
make_deterministic_clone "$repo" "$rev" "$clone2"
clone_hash2=$(nix-hash --base32 --type sha256 "$clone2")
echo "nix-hash of "$clone2": $clone_hash2"
echo "# diff of cloned repos"
diff -uNr "$clone1" "$clone2"
should_be_equal "$clone_hash1" "$clone_hash2" ": non-deterministic repo"
echo "PASS: cloned git repo with .git directory has deterministic hash"
}
# exit if any command returns non-zero
set -e
# stop 'cd' from printing the directory it entered
unset CDPATH
# create deterministic commit ids (not strictly needed to prove deterministic
# clone hashes, but makes it easier to compare test runs)
export GIT_AUTHOR_DATE="Fri Oct 31 21:00:00 2014 +0100"
export GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
can_have_deterministic_git_clones
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment