cmbaughman · October 5, 2014 03:25
diff --git a/find_ebooks.sh b/find_ebooks.sh
 #!/usr/bin/env bash
 
 declare -i CHECK_DIGIT
 declare -i B
 
 if [ ! -d "$2" ] ; then mkdir "$2" ; fi
 
 #If the directory I passed doesn't exist, create it
 
 find  "$1" -iname '*' -type f -print0 | xargs -L 1 -0 | while read F ; do
 
 unset ISBN
 
 if [ -s "$F" ] ; then
 
 #If the file has non zero size then..
 
 if (file -b "$F" | grep -w ^PDF >/dev/null) ; then
 
 #The -b flag makes the file utility not print the filename, this is important since if printed, the filename may contain a match for grep which is not what I want, I want to test the file type.
 
 #If the file is a PDF then..
 
 P=`pdfinfo "$F" 2>/dev/null 3>/dev/null 99>/dev/null | awk '/^Pages/{if ( $2 > 100 ) print $2 }'`
 
 #If the PDF has more than 100 pages, then set the number of pages to the variable P
 
 if [ $P ] ; then
 
 A=0
 
 while  [ ! $ISBN ] ; do
 
 case "$A" in
 
        0)                              A=5                     ;;
        5)                              A=6                     ;;
        6)                              A=4                     ;;
        4)                              A=3                     ;;
        3)                              A=2                     ;;
        2)                              A=1                     ;;
        1)                              A=7                     ;;
        7)                              A=8                     ;;
        8)                              A=9                     ;;
        9)                              A=10            ;;
        10)                             A=12            ;;
        12)                             A=14            ;;
        14)                             A=11            ;;
        11)                     A=13            ;;
        13)                             A=$((P-5))      ;;
        `expr $P - 5`)  A=$((P-4))      ;;
        `expr $P - 4`)  A=$((P-3))      ;;
        `expr $P - 3`)  A=$((P-2))      ;;
        `expr $P - 2`)  A=$((P-1))      ;;
        `expr $P - 1`)  A=$P            ;;
        $P)                             ISBN=0          ;;
       
 #to make case end I give the variable ISBN a value [of 0] even though it didn't find an ISBN
       
 esac
 
 # This sets A to the most likely page for grep to match the correct ISBN
 
 pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - | grep -E -o '\<978[^A-Za-z:)(,/\\.]{9,13}[[:digit:]]\>' | sed -r 's/[^0-9]//g' | uniq -s 3 | while read DOG ; do
 
 A1=`expr ${DOG:3:1} \* 3`
 A2=${DOG:4:1}
 A3=`expr ${DOG:5:1} \* 3`
 A4=${DOG:6:1}
 A5=`expr ${DOG:7:1} \* 3`
 A6=${DOG:8:1}
 A7=`expr ${DOG:9:1} \* 3`
 A8=${DOG:10:1}
 A9=`expr ${DOG:11:1} \* 3`
 A10=`expr 10 - ${DOG:12:1}`
 A11=${DOG:12:1}
 
 CHECK_DIGIT=(38+A1+A2+A3+A4+A5+A6+A7+A8+A9)%10
 
 #This calculates the check digit
 
 if [[ $CHECK_DIGIT -eq $A10 ]] ; then echo $DOG
 
 fi
 
 if [[ $A11 -eq 0 && $CHECK_DIGIT -eq 0 ]] ; then echo $DOG
 
 fi
 
 done > 1.txt
 
 # Why am I sending the output to a text file? that's a long story, piping a while loop creates a subshell..so the variables set in it unset at it's end, I needed a variable (B) to increment each time an ISBN is found...
 
 B=`wc -l <1.txt`
 
 if [[ $B -eq 1 ]]
 
 then ISBN=`awk 'NR==1 {print $1}'<1.txt`
 
 if [ ! -f "$2"/$ISBN.pdf ]; then
   
    mv "$F" "$2"/$ISBN.pdf
   
 else
    num=2
    while [ -f "$2"/$ISBN-$num.pdf ]; do
        (( num++ ))
       
    done
   
    mv "$F" "$2"/$ISBN-$num.pdf
 
 fi
 
 
 elif [[ $B -gt 1 ]] ; then
 
 C=`pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - | grep -o -w -E "(c)|©|Catalog[u]?ing[- ]in[- ]Publication[- ]Data|Congress|CIP|transmitted|[Cc]opyright|acid[- ]free"`
 
 if [ "$C" ]
 
 then ISBN=`awk 'NR==1 {print $1}'<1.txt`
 
 if [ ! -f "$2"/$ISBN.pdf ]; then
   
    mv "$F" "$2"/$ISBN.pdf
   
 else
    num=2
    while [ -f "$2"/$ISBN-$num.pdf ]; do
        (( num++ ))
       
    done
   
    mv "$F" "$2"/$ISBN-$num.pdf
 
 
 unset B
 unset CHECK_DIGIT
 
 fi
 fi
 fi
 
 done
 fi
 fi
 fi
 done
 exit 0
	#!/usr/bin/env bash

	declare -i CHECK_DIGIT
	declare -i B

	if [ ! -d "$2" ] ; then mkdir "$2" ; fi

	#If the directory I passed doesn't exist, create it

	find "$1" -iname '*' -type f -print0 \| xargs -L 1 -0 \| while read F ; do

	unset ISBN

	if [ -s "$F" ] ; then

	#If the file has non zero size then..

	if (file -b "$F" \| grep -w ^PDF >/dev/null) ; then

	#The -b flag makes the file utility not print the filename, this is important since if printed, the filename may contain a match for grep which is not what I want, I want to test the file type.

	#If the file is a PDF then..

	P=`pdfinfo "$F" 2>/dev/null 3>/dev/null 99>/dev/null \| awk '/^Pages/{if ( $2 > 100 ) print $2 }'`

	#If the PDF has more than 100 pages, then set the number of pages to the variable P

	if [ $P ] ; then

	A=0

	while [ ! $ISBN ] ; do

	case "$A" in

	0) A=5 ;;
	5) A=6 ;;
	6) A=4 ;;
	4) A=3 ;;
	3) A=2 ;;
	2) A=1 ;;
	1) A=7 ;;
	7) A=8 ;;
	8) A=9 ;;
	9) A=10 ;;
	10) A=12 ;;
	12) A=14 ;;
	14) A=11 ;;
	11) A=13 ;;
	13) A=$((P-5)) ;;
	`expr $P - 5`) A=$((P-4)) ;;
	`expr $P - 4`) A=$((P-3)) ;;
	`expr $P - 3`) A=$((P-2)) ;;
	`expr $P - 2`) A=$((P-1)) ;;
	`expr $P - 1`) A=$P ;;
	$P) ISBN=0 ;;

	#to make case end I give the variable ISBN a value [of 0] even though it didn't find an ISBN

	esac

	# This sets A to the most likely page for grep to match the correct ISBN

	pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - \| grep -E -o '\<978[^A-Za-z:)(,/\\.]{9,13}[[:digit:]]\>' \| sed -r 's/[^0-9]//g' \| uniq -s 3 \| while read DOG ; do

	A1=`expr ${DOG:3:1} \* 3`
	A2=${DOG:4:1}
	A3=`expr ${DOG:5:1} \* 3`
	A4=${DOG:6:1}
	A5=`expr ${DOG:7:1} \* 3`
	A6=${DOG:8:1}
	A7=`expr ${DOG:9:1} \* 3`
	A8=${DOG:10:1}
	A9=`expr ${DOG:11:1} \* 3`
	A10=`expr 10 - ${DOG:12:1}`
	A11=${DOG:12:1}

	CHECK_DIGIT=(38+A1+A2+A3+A4+A5+A6+A7+A8+A9)%10

	#This calculates the check digit

	if [[ $CHECK_DIGIT -eq $A10 ]] ; then echo $DOG

	fi

	if [[ $A11 -eq 0 && $CHECK_DIGIT -eq 0 ]] ; then echo $DOG

	fi

	done > 1.txt

	# Why am I sending the output to a text file? that's a long story, piping a while loop creates a subshell..so the variables set in it unset at it's end, I needed a variable (B) to increment each time an ISBN is found...

	B=`wc -l <1.txt`

	if [[ $B -eq 1 ]]

	then ISBN=`awk 'NR==1 {print $1}'<1.txt`

	if [ ! -f "$2"/$ISBN.pdf ]; then

	mv "$F" "$2"/$ISBN.pdf

	else
	num=2
	while [ -f "$2"/$ISBN-$num.pdf ]; do
	(( num++ ))

	done

	mv "$F" "$2"/$ISBN-$num.pdf

	fi


	elif [[ $B -gt 1 ]] ; then

	C=`pdftotext -enc ASCII7 "$F" -f $A -l $A 2>/dev/null 3>/dev/null 99>/dev/null - \| grep -o -w -E "(c)\|©\|Catalog[u]?ing[- ]in[- ]Publication[- ]Data\|Congress\|CIP\|transmitted\|[Cc]opyright\|acid[- ]free"`

	if [ "$C" ]

	then ISBN=`awk 'NR==1 {print $1}'<1.txt`

	if [ ! -f "$2"/$ISBN.pdf ]; then

	mv "$F" "$2"/$ISBN.pdf

	else
	num=2
	while [ -f "$2"/$ISBN-$num.pdf ]; do
	(( num++ ))

	done

	mv "$F" "$2"/$ISBN-$num.pdf


	unset B
	unset CHECK_DIGIT

	fi
	fi
	fi

	done
	fi
	fi
	fi
	done
	exit 0