Skip to content

Instantly share code, notes, and snippets.

@mperlet
Created March 15, 2017 09:47
Show Gist options
  • Save mperlet/c875c3456fd69ebed1525795ec261391 to your computer and use it in GitHub Desktop.
Save mperlet/c875c3456fd69ebed1525795ec261391 to your computer and use it in GitHub Desktop.
CAP Speiseplan Tweetbot
#!/bin/bash
echo "######################################"
echo "######### CRAWL MENU #################"
echo "######################################"
if [[ $(date '+%u') -eq 1 ]]; then
DAYFILTER="this"
else
DAYFILTER="last"
fi
FILTER=$(echo Speiseplan_$(date --date="$DAYFILTER monday" +%d.%m))
REL_PDF_LINK=$(curl "http://www.cap-markt.de/nc/cap-maerkte/cap-marktdetails.html?tx_hgcapmarkt_pi1%5Buser%5D=214&tx_hgcapmarkt_pi1%5Baction%5D=show&tx_hgcapmarkt_pi1%5Bcontroller%5D=User&cHash=b220d37c66fb90e68973a3c05a1fb9df" | grep -o '<a href=['"'"'"][^"'"'"']*pdf['"'"'"]' | sed -e 's/^<a href=["'"'"']//' -e 's/["'"'"']$//' | grep $FILTER)
echo "Download http://www.cap-markt.de$REL_PDF_LINK"
curl "http://www.cap-markt.de$REL_PDF_LINK" > menu.pdf
echo "######################################"
echo "######### CROP AND EXTRACT ###########"
echo "######################################"
echo "precrop"
pdfcrop --margins '-77 -260 -22 -250' menu.pdf precrop.pdf
case "$(date +%u)" in
"1") pdfcrop --margins '0 0 -398 0' precrop.pdf croped.pdf
;;
"2") pdfcrop --margins '-100 0 -300 0' precrop.pdf croped.pdf
;;
"3") pdfcrop --margins '-200 0 -200 0' precrop.pdf croped.pdf
;;
"4") pdfcrop --margins '-300 0 -100 0' precrop.pdf croped.pdf
;;
"5") pdfcrop --margins '-400 0 0 0' precrop.pdf croped.pdf
;;
*) echo "FAIL"; pdfcrop --margins '0 0 -398 0' precrop.pdf croped.pdf
;;
esac
pdftotext croped.pdf
#convert croped.pdf -resize 1000% -format TIFF -depth 16 croped.tif
#convert -colorspace rgb -density 400 croped.pdf -resize 400% -monochrome croped.tif
convert -density 300 croped.pdf -quality 100 croped.tif
tesseract -l deu croped.tif output
cat output.txt | tr "\n" " " | sed "s# #\n#g" | sed '/^\s*$/d' | sed -r '/^.{,7}$/d' > menu_line_by_line_ocr.txt
cat croped.txt | tr "\n" " " | sed "s# #\n#g" | sed '/^\s*$/d' | sed -r '/^.{,7}$/d' > menu_line_by_line.txt
[ -s menu_line_by_line.txt ] || echo "file is empty" && rm menu_line_by_line.txt && mv menu_line_by_line_ocr.txt menu_line_by_line.txt
echo "######################################"
echo "############ OUTPUT ##################"
echo "######################################"
echo
$EDITOR menu_line_by_line.txt
echo
cat menu_line_by_line.txt
echo
read -p "Menü Okey? y/n" yn
case $yn in
[Yy]* ) echo "dann mal los..."; break;;
[Nn]* ) exit;;
* ) echo "Please answer yes or no.";;
esac
cat menu_line_by_line.txt | while read line
do tweet.sh/tweet.sh tweet "$line"
sleep 2
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment