Skip to content

Instantly share code, notes, and snippets.

@Turupawn
Last active November 13, 2019 21:24
Show Gist options
  • Save Turupawn/88fc893405ce6b2749b18715d14c47e7 to your computer and use it in GitHub Desktop.
Save Turupawn/88fc893405ce6b2749b18715d14c47e7 to your computer and use it in GitHub Desktop.
#!/bin/bash
FILE=./input.doc
if test -f "$FILE"; then
lowriter --convert-to pdf:writer_pdf_Export input.doc
fi
mkdir image
pdftoppm input.pdf image/page -png
# DUPLICATE
#page=1
#for image_file in ./image/*.png; do
# additional_zeros=""
# if [ $page -lt 10 ]
# then
# additional_zeros="00"
# elif [ $page -lt 100 ]
# then
# additional_zeros="0"
# fi
# cp $image_file "./image/"$additional_zeros$page"A.png"
# mv $image_file "./image/"$additional_zeros$page"B.png"
# page=$((page+1))
#done
# CROP NORTH
#for image_file in ./image/*.png; do
# convert $image_file -gravity North -chop 0x474 $image_file
#done
# CROP SOUTH
#for image_file in ./image/*.png; do
# convert $image_file -gravity South -chop 0x426 $image_file
#done
# CROP EAST
#for image_file in ./image/*A.png; do
# convert $image_file -gravity East -chop 825x0 $image_file
#done
# CROP WEST
#for image_file in ./image/*B.png; do
# convert $image_file -gravity West -chop 825x0 $image_file
#done
mkdir text
page=1
for image_file in ./image/*.png; do
additional_zeros=""
if [ $page -lt 10 ]
then
additional_zeros="00"
elif [ $page -lt 100 ]
then
additional_zeros="0"
fi
tesseract $image_file "./text/"$additional_zeros$page -l spa
page=$((page+1))
done
for text_file in ./text/*.txt; do
cat $text_file >> output.txt
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment