Created
May 13, 2013 20:11
-
-
Save pjaspers/5571082 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ghetto OCR | |
# | |
# Requires: | |
# - brew install imagemagick | |
# - brew install tesseract --all-languages | |
# | |
# Takes pdfs and spits out a txt file with anything it could OCR | |
function ocr { | |
if (( $# < 1 )) | |
then echo "usage: ocr filename.pdf"; return 1; fi | |
for i in $* | |
do | |
file_name="$( basename "$i" .pdf)" | |
echo "Converting ${file_name} to tif" | |
convert -monochrome -density 600 $i "${file_name}.tif" | |
echo "Starting OCR" | |
tesseract -l nld "${file_name}.tif" "${file_name}" 2>/dev/null | |
rm "${file_name}.tif" | |
done | |
echo "Done." | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment