sepastian · June 18, 2025 12:13
diff --git a/img2pdf.sh b/img2pdf.sh
 # UPDATE: new version is a one-liner, using GNU parallel.
 #
 # For each image in img/, create a searchable PDF in pdf/.
 #
 # Requires tesseract and GNU parallel.
 #
 # Note: the CPU used had 12 cores;
 # specifying -j 4 runs 4 parallel processes;
 # not specifying -j would result in using all cores, which was very slow;
 # it may be possible to use between 4 and 12 cores, needs testing.
 mkdir pdf
 find img/ -type f -name 'page_*jpg' | parallel -j 4 --verbose 'tesseract {} pdf/{/.} pdf'
	# UPDATE: new version is a one-liner, using GNU parallel.
	#
	# For each image in img/, create a searchable PDF in pdf/.
	#
	# Requires tesseract and GNU parallel.
	#
	# Note: the CPU used had 12 cores;
	# specifying -j 4 runs 4 parallel processes;
	# not specifying -j would result in using all cores, which was very slow;
	# it may be possible to use between 4 and 12 cores, needs testing.
	mkdir pdf
	find img/ -type f -name 'page_*jpg' \| parallel -j 4 --verbose 'tesseract {} pdf/{/.} pdf'