Created
June 11, 2016 19:50
-
-
Save sourcesimian/b818dc6c3bf15705abd558ed551caac9 to your computer and use it in GitHub Desktop.
Filesystem tree content comparison using md5sum
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Filesystem tree content comparison using md5sum | |
# | |
# Usage: | |
# $ md5tree-cmd hash <folder 1> > my_hash_file_1 | |
# $ md5tree-cmp hash <folder 2> > my_hash_file_2 | |
# | |
# $ md5tree-cmp only <hash_file_1> <hash_file_2> | |
set -e | |
cmd=$1 | |
function hash() { | |
find "$PWD/$1" -type f -exec md5sum "{}" + | |
} | |
function only() { | |
A=$1 | |
B=$2 | |
tmp=$(mktemp -t md5tree-cmp) | |
echo "* tmp: $tmp" | |
Ahash="${tmp}-Ahash" # Just the hashes in A, sorted | |
Bhash="${tmp}-Bhash" # Just the hashes in B, sorted | |
Dhash="${tmp}-Dhash" # The diff of the hashes | |
Aonly="${tmp}-Aonly" # Unmatched hashes | |
Bonly="${tmp}-Bonly" # | |
trap "tmp_cleanup $tmp" EXIT TERM INT | |
echo "* A: $A" | |
echo "* B: $B" | |
echo "* Collect hashes from files" | |
cat "$A" | cut -d' ' -f1 | sort > "$Ahash" | |
cat "$B" | cut -d' ' -f1 | sort > "$Bhash" | |
echo "* Diff hashes" | |
diff -u "$Ahash" "$Bhash" > "$Dhash" || true | |
echo "* Collect the adds/removes from the diff" | |
cat "$Dhash" | grep "^\-[^-]" | sed -e 's/-//g' > "$Aonly" | |
cat "$Dhash" | grep "^\+[^+]" | sed -e 's/+//g' > "$Bonly" | |
echo " - Files only in A: $(cat $Aonly | wc -l)" | |
echo " - Files only in B: $(cat $Bonly | wc -l)" | |
echo "* Writing all lines only in A to: ${A}_ONLY ..." | |
grep "$A" --file "$Aonly" > "${A}-ONLY" | |
echo "* Writing all lines only in B to: ${B}_ONLY ..." | |
grep "$B" --file "$Bonly" > "${B}-ONLY" | |
echo "* Done" | |
} | |
function tmp_cleanup() { | |
rm $1* | |
} | |
if [ "$cmd" == hash ]; then | |
hash $2 | |
elif [ "$cmd" == only ]; then | |
only $2 $3 | |
else | |
echo "md5sum-cmp" | |
echo "Usage:" | |
echo " $ md5sum-cmp hash <folder> > <output>" | |
echo " $ md5sum-cmp only <output1> <output2>" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment