Created
October 5, 2014 03:25
-
-
Save cmbaughman/8f7e63516649d98db2fe to your computer and use it in GitHub Desktop.
eBook Open Dir Finder 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
declare -i CHECK_DIGIT | |
declare -i B | |
if [ ! -d "$2" ] ; then mkdir "$2" ; fi | |
#If the directory I passed doesn't exist, create it | |
find "$1" -iname '*' -type f -print0 | xargs -L 1 -0 | while read F ; do | |
unset ISBN | |
if [ -s "$F" ] ; then | |
#If the file has non zero size then.. | |
if (file -b "$F" | grep -w ^PDF >/dev/null) ; then | |
#The -b flag makes the file utility not print the filename, this is important since if printed, the filename may contain a match for grep which is not what I want, I want to test the file type. | |
#If the file is a PDF then.. | |
P=`pdfinfo "$F" 2>/dev/null 3>/dev/null 99>/dev/null | awk '/^Pages/{if ( $2 > 100 ) print $2 }'` | |
#If the PDF has more than 100 pages, then set the number of pages to the variable P | |
if [ $P ] ; then | |
A=0 | |
while [ ! $ISBN ] ; do | |
case "$A" in | |
0) A=5 ;; | |
5) A=6 ;; | |
6) A=4 ;; | |
4) A=3 ;; | |
3) A=2 ;; | |
2) A=1 ;; | |
1) A=7 ;; | |
7) A=8 ;; | |
8) A=9 ;; | |
9) A=10 ;; | |
10) A=12 ;; | |
12) A=14 ;; | |
14) A=11 ;; | |
11) A=13 ;; | |
13) A=$((P-5)) ;; | |
`expr $P - 5`) A=$((P-4)) ;; | |
`expr $P - 4`) A=$((P-3)) ;; | |
`expr $P - 3`) A=$((P-2)) ;; | |
`expr $P - 2`) A=$((P-1)) ;; | |
`expr $P - 1`) A=$P ;; | |
$P) ISBN=0 ;; | |
#to make case end I give the variable ISBN a value [of 0] even though it didn't find an ISBN | |
esac | |
# This sets A to the most likely page for grep to match the correct ISBN | |
pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - | grep -E -o '\<978[^A-Za-z:)(,/\\.]{9,13}[[:digit:]]\>' | sed -r 's/[^0-9]//g' | uniq -s 3 | while read DOG ; do | |
A1=`expr ${DOG:3:1} \* 3` | |
A2=${DOG:4:1} | |
A3=`expr ${DOG:5:1} \* 3` | |
A4=${DOG:6:1} | |
A5=`expr ${DOG:7:1} \* 3` | |
A6=${DOG:8:1} | |
A7=`expr ${DOG:9:1} \* 3` | |
A8=${DOG:10:1} | |
A9=`expr ${DOG:11:1} \* 3` | |
A10=`expr 10 - ${DOG:12:1}` | |
A11=${DOG:12:1} | |
CHECK_DIGIT=(38+A1+A2+A3+A4+A5+A6+A7+A8+A9)%10 | |
#This calculates the check digit | |
if [[ $CHECK_DIGIT -eq $A10 ]] ; then echo $DOG | |
fi | |
if [[ $A11 -eq 0 && $CHECK_DIGIT -eq 0 ]] ; then echo $DOG | |
fi | |
done > 1.txt | |
# Why am I sending the output to a text file? that's a long story, piping a while loop creates a subshell..so the variables set in it unset at it's end, I needed a variable (B) to increment each time an ISBN is found... | |
B=`wc -l <1.txt` | |
if [[ $B -eq 1 ]] | |
then ISBN=`awk 'NR==1 {print $1}'<1.txt` | |
if [ ! -f "$2"/$ISBN.pdf ]; then | |
mv "$F" "$2"/$ISBN.pdf | |
else | |
num=2 | |
while [ -f "$2"/$ISBN-$num.pdf ]; do | |
(( num++ )) | |
done | |
mv "$F" "$2"/$ISBN-$num.pdf | |
fi | |
elif [[ $B -gt 1 ]] ; then | |
C=`pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - | grep -o -w -E "(c)|©|Catalog[u]?ing[- ]in[- ]Publication[- ]Data|Congress|CIP|transmitted|[Cc]opyright|acid[- ]free"` | |
if [ "$C" ] | |
then ISBN=`awk 'NR==1 {print $1}'<1.txt` | |
if [ ! -f "$2"/$ISBN.pdf ]; then | |
mv "$F" "$2"/$ISBN.pdf | |
else | |
num=2 | |
while [ -f "$2"/$ISBN-$num.pdf ]; do | |
(( num++ )) | |
done | |
mv "$F" "$2"/$ISBN-$num.pdf | |
unset B | |
unset CHECK_DIGIT | |
fi | |
fi | |
fi | |
done | |
fi | |
fi | |
fi | |
done | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment