Last active
December 16, 2015 06:48
-
-
Save douglascodes/5393571 to your computer and use it in GitHub Desktop.
Bash script for finding the percentage of Byte difference between two files. Uses wc, cmp, grep and bc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
echo Reports the percentage of matching bytes between two files. | |
LENGTH_OF_A=$( wc -c $1 | grep -Eo [0-9]+) | |
LENGTH_OF_B=$( wc -c $2 | grep -Eo [0-9]+) | |
SAME_SIZE="false" | |
if [ "$LENGTH_OF_B" -gt "$LENGTH_OF_A" ] | |
then | |
LONGER=$2 | |
SHORTER=$1 | |
let LONGER_BYTE_NUMBER=$LENGTH_OF_B | |
let SHORTER_BYTE_NUMBER=$LENGTH_OF_A | |
else | |
LONGER=$1 | |
SHORTER=$2 | |
let LONGER_BYTE_NUMBER=$LENGTH_OF_A | |
let SHORTER_BYTE_NUMBER=$LENGTH_OF_B | |
fi | |
if [ "$LENGTH_OF_A" -eq "$LENGTH_OF_B" ] | |
then | |
let SAME_SIZE="true" | |
echo The files lengths were equal. | |
else | |
let DIFF=$LONGER_BYTE_NUMBER-$SHORTER_BYTE_NUMBER | |
echo "Difference in file lengths: $DIFF byte(s)" | |
fi | |
let DIFF_COUNT=$(cmp -l $LONGER $SHORTER 2> /dev/null | wc -w) # Runs byte by byte comparison sends it for a word count | |
let WRONG=$(((DIFF_COUNT)/3)) # Each line of 'cmp -l' is three 'words' | |
let CORRECT=$SHORTER_BYTE_NUMBER-$WRONG | |
ANSWER=$(echo "scale=9; $CORRECT / $SHORTER_BYTE_NUMBER * 100 " | bc ) | |
echo "$1 length: $LENGTH_OF_A" | |
echo "$2 length: $LENGTH_OF_B" | |
echo "Unmatched bytes: $WRONG" | |
echo "Matched bytes: $CORRECT" | |
echo "Similarity is $ANSWER % of $SHORTER_BYTE_NUMBER bytes." | |
if [ "$SAME_SIZE" == "true" ] | |
then | |
exit | |
fi | |
ANSWER=$(echo "scale=9; $CORRECT / $LONGER_BYTE_NUMBER * 100" | bc ) | |
echo "Similarity is $ANSWER % of the longer $LONGER_BYTE_NUMBER bytes." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment