Last active
May 12, 2020 00:27
-
-
Save Saruspete/f5dbe749f6cf4bc655f4365a3e4a436e to your computer and use it in GitHub Desktop.
Hash per block for large files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -o nounset | |
set -o noclobber | |
export LC_ALL=C | |
export PATH="/bin:/sbin:/usr/bin:/usr/sbin:$PATH" | |
readonly MYSELF="$(readlink -f $0)" | |
readonly MYPATH="${MYSELF%/*}" | |
# Defualt to 1G Block + MD5 | |
typeset -i BLOCKSIZE="$((1024*1024*1024))" | |
typeset HASHFUNC="md5sum" | |
typeset -i STARTSIZE=0 | |
typeset -i STOPSIZE=0 | |
# Copy stdin for output | |
exec 3>&1 | |
function hashstdin { | |
# Consume stdin | |
typeset h="$($HASHFUNC -)" | |
echo >&3 "$@: $h" | |
} | |
# Very basic option parsing | |
while [[ -n "${1:-}" ]]; do | |
# Stop if option does not start with -- | |
[[ "${1#--}" == "${1}" ]] && break | |
case "$1" in | |
--blocksize) BLOCKSIZE="${2:-}"; shift ;; | |
--blocksize=*) BLOCKSIZE="${1#*=}" ;; | |
--hash) HASHFUNC="${2}"; shift ;; | |
--hash=*) HASHFUNC="${1#*=}" ;; | |
--startsize) STARTSIZE="${2}"; shift ;; | |
--startsize=*) STARTSIZE="${1#*=}"; ;; | |
--stopsize) STOPSIZE="${2}"; shift ;; | |
--stopsize=*) STOPSIZE="${1#*=}"; ;; | |
*) echo >&2 "Error: Unknown option: $1"; exit 1 ;; | |
esac | |
shift | |
done | |
# | |
# Check user values | |
# | |
if [[ -z "$HASHFUNC" ]] || ! type -p $HASHFUNC >/dev/null; then | |
echo >&2 "Error: Unknown hash function: $HASHFUNC" | |
exit 1 | |
fi | |
typeset FILE="${1:-}" | |
if ! [[ -s "$FILE" ]]; then | |
echo >&2 "Error: non-existant or empty file '$FILE'" | |
exit 1 | |
fi | |
# | |
# Calculated values | |
# | |
typeset -i FILESIZE="$(stat -c %s "$FILE")" | |
typeset -i gb="$(( 1024 * 1024 * 1024 ))" | |
# Defaults to 10 chunks | |
if [[ $BLOCKSIZE == 0 ]]; then | |
# Less than 10G, use 1G chunks | |
if [[ $FILESIZE -lt $(( 10 *$gb)) ]]; then | |
BLOCKSIZE="$(( 1*$gb ))" | |
[[ $BLOCKSIZE -gt $FILESIZE ]] && BLOCKSIZE=$FILESIZE | |
# More than 10G, do 10 blocks | |
else | |
BLOCKSIZE=$(( $FILESIZE / 10 )) | |
fi | |
fi | |
typeset BLOCKCNT="$(($FILESIZE / $BLOCKSIZE))" | |
typeset -i blocknum=0 | |
[[ $STARTSIZE -ne 0 ]] && blocknum="$(( $STARTSIZE / $BLOCKSIZE ))" | |
[[ $STOPSIZE -ne 0 ]] && BLOCKCNT="$(( $STOPSIZE / $BLOCKSIZE ))" | |
while [[ $blocknum -le $BLOCKCNT ]]; do | |
typeset -i skip="$blocknum" | |
dd if="$FILE" bs=$BLOCKSIZE count=1 skip=$skip 2>/dev/null | tee >(hashstdin "Block $blocknum ($(( $BLOCKSIZE*$skip )) - $(($BLOCKSIZE*($skip + 1) )))") | |
blocknum+=1 | |
done | hashstdin "Whole file: ($FILESIZE)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment