Skip to content

Instantly share code, notes, and snippets.

@xaizek
Last active June 17, 2016 16:25
Show Gist options
  • Save xaizek/eaf79a2d36bd1f34a3822e18bc3a6472 to your computer and use it in GitHub Desktop.
Save xaizek/eaf79a2d36bd1f34a3822e18bc3a6472 to your computer and use it in GitHub Desktop.
Compares the files in two directories to find ones which are present in the source but not in the destination and vice versa. Useful for data migrations.
#!/bin/bash
# Based on
# https://github.com/capncodewash/Misc-shell-scripts/blob/master/find_missing_files.sh
# by Graeme West which is in turn based on the answer kindly supplied by
# 'Unknown' at StackOverflow:
# http://stackoverflow.com/questions/3006014/finding-missing-files-by-checksum
# with additions from aleksejrs.
#
# Compares the files in two directories to find ones which are present in the
# source but not in the destination and vice versa. Useful for data migrations.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details,
# which is available at www.gnu.org
set -o nounset
if [ $# -ne 2 ]; then
echo "Usage: $(basename "$0") path/to/a/dir path/to/b/dir"
exit 1
fi
src_dir="$1/"
dst_dir="$2/"
work_dir="$(mktemp -dt fmfXXXXXXX)"
function cleanup()
{
rm -r "$work_dir"
}
trap cleanup EXIT
# Temporary files into which to write sorted checksums of all files, they are
# used to make the comparisons.
alist="$work_dir/cmp_source_files_sorted.txt"
blist="$work_dir/cmp_dest_files_sorted.txt"
# The files which will contain the final output.
# They will be cleared before output begins.
a_only="vifm-dircompare-a-only.lst"
b_only="vifm-dircompare-b-only.lst"
find "$src_dir" -type f -exec sha512sum {} \; | sort > "$alist"
find "$dst_dir" -type f -exec sha512sum {} \; | sort > "$blist"
function find_missing()
{
function 1st()
{
cut -f1 -d' ' "$@"
}
function 2nd()
{
awk '{print $2}' "$@"
}
fgrep -f <(comm "-$1" <(1st "$alist") <(1st "$blist")) "$2" | 2nd | sort -u
}
find_missing 23 "$alist" > "$a_only"
find_missing 13 "$blist" > "$b_only"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment