Last active
August 29, 2015 14:15
-
-
Save bjornfor/bb96b09d4bd1a488cd01 to your computer and use it in GitHub Desktop.
Test if git clones (at specific revision) can be deterministic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# A test to see if a deterministic fetchgit with leaveDotGit is possible. | |
# Short answer; yes. | |
# | |
# Q: Can .git/index be deterministic? | |
# A: No, impossible (it contains inode numbers). If you *must* have it, remove | |
# it before calculating checksums, then re-create it from the repo contents. | |
create_annotated_tag() | |
{ | |
local repo="$1" | |
local tag="$2" | |
echo "# creating annotated tag $tag in $repo" | |
(cd "$repo" && git tag -a -m "Message for tag $tag" "$tag") | |
} | |
create_lightweight_tag() | |
{ | |
local repo="$1" | |
local tag="$2" | |
echo "# creating lightweight tag $tag in $repo" | |
(cd "$repo" && git tag "$tag") | |
} | |
initialize_repo() | |
{ | |
local repo="$1" | |
rm -rf "$repo" | |
mkdir -p "$repo" | |
(cd "$repo" && git init .) | |
# Create first commit | |
(cd "$repo" \ | |
&& echo "hello" >README.txt \ | |
&& git add . \ | |
&& git commit -m "Initial commit" \ | |
) | |
} | |
create_n_commits() | |
{ | |
local repo="$1" | |
local num_commits="$2" | |
printf "Creating $num_commits commits in $repo" | |
# git rev-list HEAD --count | |
for i in $(seq "$num_commits"); do | |
echo "# adding a commit to $repo" | |
echo "$i" >>"$repo"/file | |
(cd "$repo" && git add . && git commit -m "some change") | |
done | while read; do printf .; done | |
echo | |
} | |
show_log() | |
{ | |
local repo="$1" | |
echo "# log of $repo:" | |
(cd "$repo" && git log --pretty=oneline --decorate) | |
} | |
should_be_equal() | |
{ | |
if [ "$1" != "$2" ]; then | |
echo "FAIL: $1 and $2 differ$3" | |
exit 1 | |
fi | |
} | |
should_not_be_equal() | |
{ | |
if [ "$1" = "$2" ]; then | |
echo "FAIL: $1 and $2 are equal$3" | |
exit 1 | |
fi | |
} | |
make_deterministic_clone() | |
{ | |
local url="$1" | |
local rev="$2" | |
local dest="$3" | |
rm -rf "$dest" | |
echo "# cloning repo $url" | |
git clone "$url" "$dest" | |
echo "# resetting $dest HEAD to $rev ($(cd "$dest" && git describe "$rev"))" | |
(cd "$dest" && git reset --hard "$rev") | |
make_deterministic_repo "$dest" | |
#show_log "$dest" | |
} | |
# Remove all remote branches, remove tags not reachable from HEAD, do a full | |
# repack and then garbage collect unreferenced objects. | |
make_deterministic_repo(){ | |
local repo="$1" | |
# run in sub-shell to not touch current working directory | |
( | |
cd "$repo" | |
# Remove files that contain timestamps or otherwise have non-deterministic | |
# properties. | |
rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \ | |
.git/refs/remotes/origin/HEAD .git/config | |
# Remove all remote branches. | |
git branch -r | while read branch; do | |
git branch -rD "$branch" >&2 | |
done | |
# Remove tags not reachable from HEAD. If we're exactly on a tag, don't | |
# delete it. | |
maybe_tag=$(git tag --points-at HEAD) | |
git tag --contains HEAD | while read tag; do | |
if [ "$tag" != "$maybe_tag" ]; then | |
git tag -d "$tag" >&2 | |
fi | |
done | |
# Do a full repack. Must run single-threaded, or else we lose determinism. | |
git config pack.threads 1 | |
git repack -A -d -f | |
rm -f .git/config | |
# Garbage collect unreferenced objects. | |
git gc --prune=all | |
) | |
} | |
can_have_deterministic_git_clones() | |
{ | |
out="$(basename "$0")-output" | |
repo="$out-repo" | |
clone1="$out-clone1" | |
clone2="$out-clone2" | |
rm -rf "$repo" "$clone1" "$clone2" | |
# create repo with N commits and a mix of lightweight and annotated tags | |
initialize_repo "$repo" | |
create_annotated_tag "$repo" v0.1a | |
create_n_commits "$repo" 1000 | |
create_lightweight_tag "$repo" v0.2l | |
create_n_commits "$repo" 100 | |
#show_log "$repo" | |
# grab the current HEAD id | |
rev=$(cd "$repo" && git rev-parse HEAD) | |
echo "# target commit id: rev=$rev ($(cd "$repo" && git describe "$rev"))" | |
# make first repo clone and check its hash | |
make_deterministic_clone "$repo" "$rev" "$clone1" | |
clone_hash1=$(nix-hash --base32 --type sha256 "$clone1") | |
echo "nix-hash of "$clone1": $clone_hash1" | |
# add extra commits and tags (both annotated and lightweight) | |
# AND switch branch | |
create_n_commits "$repo" 1000 | |
create_annotated_tag "$repo" v0.3a | |
create_n_commits "$repo" 100 | |
create_lightweight_tag "$repo" v0.4l | |
#(cd "$repo" && git co -b branch-foo) | |
#show_log "$repo" | |
# make second repo clone and check its hash. This clone starts out with | |
# extra commits on the master branch and a newer git tag. Everything | |
# not reachable from given $rev must be removed for the clone to be | |
# deterministic (arguably, it's not a "clone" anymore). | |
make_deterministic_clone "$repo" "$rev" "$clone2" | |
clone_hash2=$(nix-hash --base32 --type sha256 "$clone2") | |
echo "nix-hash of "$clone2": $clone_hash2" | |
echo "# diff of cloned repos" | |
diff -uNr "$clone1" "$clone2" | |
should_be_equal "$clone_hash1" "$clone_hash2" ": non-deterministic repo" | |
echo "PASS: cloned git repo with .git directory has deterministic hash" | |
} | |
# exit if any command returns non-zero | |
set -e | |
# stop 'cd' from printing the directory it entered | |
unset CDPATH | |
# create deterministic commit ids (not strictly needed to prove deterministic | |
# clone hashes, but makes it easier to compare test runs) | |
export GIT_AUTHOR_DATE="Fri Oct 31 21:00:00 2014 +0100" | |
export GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE" | |
can_have_deterministic_git_clones |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment