Created
January 4, 2012 21:09
-
-
Save uid0/1562153 to your computer and use it in GitHub Desktop.
Bash Script to check PDF's
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script should iterate through the 13K PDF's that I have, and hopefully | |
# tell me which ones are different. | |
# Check to see that yes, we actually got an argument... | |
[ "$#" -eq 1 ] || die "1 argument required, $# provided" | |
# Let's read the output from diff, and then iterate through the list. | |
# | |
# Sample output from diff: | |
# | |
#Binary files /contracts/prod/attachments/offers_2713-2812/271233/618932.pdf and /contracts2/prod/attachments/offers_2713-2812/271233/618932.pdf differ | |
#Binary files /contracts/prod/attachments/offers_2713-2812/271243/619378.pdf and /contracts2/prod/attachments/offers_2713-2812/271243/619378.pdf differ | |
# | |
# | |
while read line | |
do | |
f1=`echo $line | awk '{print $3}'` | |
f2=`echo $line | awk '{print $5}'` | |
# Check to make sure both files exist.. | |
if [ -f $f1 ] | |
then | |
if [ -f $f2 ] | |
then | |
# Ok...let's do this ish! | |
compare $f1 $f2 -compose src diff.pdf | |
gs -o diff.ppm -sDEVICE=ppmraw -r72 -g`identify -format "%[fx:(w)]x%[fx:(h)]" diff.pdf` diff.pdf | |
gs -o white.ppm -sDEVICE=ppmraw -r72 -g`identify -format "%[fx:(w)]x%[fx:(h)]" diff.pdf` -c "showpage" | |
md51=$(md5sum diff.ppm|awk '{print $1}') | |
md52=$(md5sum white.ppm|awk '{print $1}') | |
if [ "x${md52}" == "x${md51}" ] | |
then | |
echo "cp $f1 $f2" >> /root/runme.sh | |
else | |
# The files are different... | |
echo "$f1 $f2" >> /root/imagediff.out | |
fi | |
fi | |
fi | |
done < "$1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment