Skip to content

Instantly share code, notes, and snippets.

@Saruspete
Last active May 12, 2020 00:27
Show Gist options
  • Save Saruspete/f5dbe749f6cf4bc655f4365a3e4a436e to your computer and use it in GitHub Desktop.
Save Saruspete/f5dbe749f6cf4bc655f4365a3e4a436e to your computer and use it in GitHub Desktop.
Hash per block for large files
#!/usr/bin/env bash
set -o nounset
set -o noclobber
export LC_ALL=C
export PATH="/bin:/sbin:/usr/bin:/usr/sbin:$PATH"
readonly MYSELF="$(readlink -f $0)"
readonly MYPATH="${MYSELF%/*}"
# Defualt to 1G Block + MD5
typeset -i BLOCKSIZE="$((1024*1024*1024))"
typeset HASHFUNC="md5sum"
typeset -i STARTSIZE=0
typeset -i STOPSIZE=0
# Copy stdin for output
exec 3>&1
function hashstdin {
# Consume stdin
typeset h="$($HASHFUNC -)"
echo >&3 "$@: $h"
}
# Very basic option parsing
while [[ -n "${1:-}" ]]; do
# Stop if option does not start with --
[[ "${1#--}" == "${1}" ]] && break
case "$1" in
--blocksize) BLOCKSIZE="${2:-}"; shift ;;
--blocksize=*) BLOCKSIZE="${1#*=}" ;;
--hash) HASHFUNC="${2}"; shift ;;
--hash=*) HASHFUNC="${1#*=}" ;;
--startsize) STARTSIZE="${2}"; shift ;;
--startsize=*) STARTSIZE="${1#*=}"; ;;
--stopsize) STOPSIZE="${2}"; shift ;;
--stopsize=*) STOPSIZE="${1#*=}"; ;;
*) echo >&2 "Error: Unknown option: $1"; exit 1 ;;
esac
shift
done
#
# Check user values
#
if [[ -z "$HASHFUNC" ]] || ! type -p $HASHFUNC >/dev/null; then
echo >&2 "Error: Unknown hash function: $HASHFUNC"
exit 1
fi
typeset FILE="${1:-}"
if ! [[ -s "$FILE" ]]; then
echo >&2 "Error: non-existant or empty file '$FILE'"
exit 1
fi
#
# Calculated values
#
typeset -i FILESIZE="$(stat -c %s "$FILE")"
typeset -i gb="$(( 1024 * 1024 * 1024 ))"
# Defaults to 10 chunks
if [[ $BLOCKSIZE == 0 ]]; then
# Less than 10G, use 1G chunks
if [[ $FILESIZE -lt $(( 10 *$gb)) ]]; then
BLOCKSIZE="$(( 1*$gb ))"
[[ $BLOCKSIZE -gt $FILESIZE ]] && BLOCKSIZE=$FILESIZE
# More than 10G, do 10 blocks
else
BLOCKSIZE=$(( $FILESIZE / 10 ))
fi
fi
typeset BLOCKCNT="$(($FILESIZE / $BLOCKSIZE))"
typeset -i blocknum=0
[[ $STARTSIZE -ne 0 ]] && blocknum="$(( $STARTSIZE / $BLOCKSIZE ))"
[[ $STOPSIZE -ne 0 ]] && BLOCKCNT="$(( $STOPSIZE / $BLOCKSIZE ))"
while [[ $blocknum -le $BLOCKCNT ]]; do
typeset -i skip="$blocknum"
dd if="$FILE" bs=$BLOCKSIZE count=1 skip=$skip 2>/dev/null | tee >(hashstdin "Block $blocknum ($(( $BLOCKSIZE*$skip )) - $(($BLOCKSIZE*($skip + 1) )))")
blocknum+=1
done | hashstdin "Whole file: ($FILESIZE)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment