Forked from IaroslavR/gist:834066ba4c0e25a27078
Last active
January 18, 2019 22:37
-
-
Save thimslugga/3d8f3bacb0b427d6a84b342a73cb7382 to your computer and use it in GitHub Desktop.
Install lastest tesseract on Amazon Linux 2018.03
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Sources: | |
# https://gist.github.com/IaroslavR/834066ba4c0e25a27078 | |
# https://ivanzz1001.github.io/records/post/ocr/2017/09/08/tesseract-install | |
# https://groups.google.com/forum/#!topic/tesseract-ocr/u-PZaakaKs0 | |
# | |
# Instructions: | |
# wget -c https://gist.githubusercontent.com/thimslugga/3d8f3bacb0b427d6a84b342a73cb7382/raw/e396348a46b65a8909aecf4c4fa3185ed2303c3a/install_tesseract_amazon_linux_201803.sh | |
# chmod +x install_tesseract_amazon_linux_201803.sh | |
# screen ./install_tesseract_amazon_linux_201803.sh | |
# | |
if [[ $EUID -ne 0 ]]; then | |
echo "This script must be run as root" | |
exit 1 | |
fi | |
yum -y groupinstall "development tools" | |
yum install -y wget git gcc-c++ cmake autoconf aclocal automake pkgconfig libtool \ | |
ImageMagick libjpeg-devel libpng-devel libtiff-devel zlib-devel | |
cat <<EOT >> ~/.bashrc | |
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib/pkgconfig | |
EOT | |
source ~/.bashrc | |
cd /usr/local/src | |
wget -c http://www.leptonica.com/source/leptonica-1.77.0.tar.gz | |
tar -zxvf leptonica-1.77.0.tar.gz | |
cd leptonica-1.77.0 | |
./configure | |
make -j | |
make install | |
ldconfig | |
#make uninstall | |
#pkg-config --list-all | grep lept | |
cat <<EOT >> ~/.bashrc | |
# Export the following paths to compile tesseract | |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib | |
export LIBLEPT_HEADERSDIR=/usr/local/include/leptonica | |
EOT | |
source ~/.bashrc | |
cd /usr/local/src | |
#git clone https://github.com/tesseract-ocr/langdata.git | |
#git clone https://github.com/tesseract-ocr/tessdata.git | |
#git clone https://github.com/tesseract-ocr/tessdata_best.git | |
#git clone https://github.com/tesseract-ocr/tessdata_fast.git | |
wget -c https://github.com/tesseract-ocr/tesseract/archive/4.0.0.tar.gz | |
tar -zxvf 4.0.0.tar.gz | |
cd tesseract-4.0.0/ | |
./autogen.sh | |
#autoreconf --force --install | |
autoreconf -i | |
./configure | |
#./configure --disable-graphics | |
make -j | |
make install | |
ldconfig | |
# Download and install tesseract language files (Tesseract 4.0.0 trained data files) | |
cd /usr/local/share/tessdata | |
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/chi_sim.traineddata | |
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata | |
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/ori.traineddata | |
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/osd.traineddata | |
# Best for lstm | |
#mkdir -p /usr/local/share/tessdata_best && cd tessdata_best | |
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/chi_sim.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/chi_sim_vert.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/ori.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/osd.traineddata | |
# Fast | |
#mkdir -p /usr/local/share/tessdata_fast && cd tessdata_fast | |
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/chi_sim.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/chi_sim_vert.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/eng.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/ori.traineddata | |
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/osd.traineddata | |
# Add this line to the end of ~/.bashrc: export TESSDATA_PREFIX=/usr/local/share/tessdata | |
cat <<EOT >> ~/.bashrc | |
export TESSDATA_PREFIX=/usr/local/share/tessdata | |
EOT | |
source ~/.bashrc | |
# Verify: | |
tesseract -v | |
tesseract --list-langs | |
# Example Commands: | |
# tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfile...] | |
# tesseract myscan.png out.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment