cimnine · November 26, 2021 16:31
diff --git a/ocr.sh b/ocr.sh
 #!/bin/bash

 set -e

 analyze() {
 	BASE="$1"
 	echo "Converting 'input/$BASE.pdf' to 'output/$BASE.tiff'"
 	convert -density 300 "input/$BASE.pdf" "output/$BASE.tiff" 2>&1
 	
 	echo "OCR of 'output/$BASE.pdf' to 'output/${BASE}_deu.pdf'"
 	tesseract "output/$BASE.tiff" "output/${BASE}_deu" -l deu --dpi 300 pdf 2>&1

 	echo "Removing 'output/$BASE.tiff'"
 	rm "output/$BASE.tiff"
 }

 mkdir -p output

 i=0
 parallel=4
 for PDF in input/*.pdf; do
 	BASE="${PDF%.*}"
 	BASE="${BASE##*/}"

 	echo "Starting process for 'output/$BASE.pdf' in the background"
 	analyze "$BASE" 2>&1 > "output/$BASE.log" &
 	if [ $(( i++ % parallel )) == 0 ]; then
 		echo "Waiting on $parallel process to complete"
 		wait
 	fi
 done

 echo "Waiting for completion"
 wait
	#!/bin/bash

	set -e

	analyze() {
	BASE="$1"
	echo "Converting 'input/$BASE.pdf' to 'output/$BASE.tiff'"
	convert -density 300 "input/$BASE.pdf" "output/$BASE.tiff" 2>&1

	echo "OCR of 'output/$BASE.pdf' to 'output/${BASE}_deu.pdf'"
	tesseract "output/$BASE.tiff" "output/${BASE}_deu" -l deu --dpi 300 pdf 2>&1

	echo "Removing 'output/$BASE.tiff'"
	rm "output/$BASE.tiff"
	}

	mkdir -p output

	i=0
	parallel=4
	for PDF in input/*.pdf; do
	BASE="${PDF%.*}"
	BASE="${BASE##*/}"

	echo "Starting process for 'output/$BASE.pdf' in the background"
	analyze "$BASE" 2>&1 > "output/$BASE.log" &
	if [ $(( i++ % parallel )) == 0 ]; then
	echo "Waiting on $parallel process to complete"
	wait
	fi
	done

	echo "Waiting for completion"
	wait
No results found