Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save thimslugga/3d8f3bacb0b427d6a84b342a73cb7382 to your computer and use it in GitHub Desktop.
Save thimslugga/3d8f3bacb0b427d6a84b342a73cb7382 to your computer and use it in GitHub Desktop.
Install lastest tesseract on Amazon Linux 2018.03
#!/bin/bash
#
# Sources:
# https://gist.github.com/IaroslavR/834066ba4c0e25a27078
# https://ivanzz1001.github.io/records/post/ocr/2017/09/08/tesseract-install
# https://groups.google.com/forum/#!topic/tesseract-ocr/u-PZaakaKs0
#
# Instructions:
# wget -c https://gist.githubusercontent.com/thimslugga/3d8f3bacb0b427d6a84b342a73cb7382/raw/e396348a46b65a8909aecf4c4fa3185ed2303c3a/install_tesseract_amazon_linux_201803.sh
# chmod +x install_tesseract_amazon_linux_201803.sh
# screen ./install_tesseract_amazon_linux_201803.sh
#
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
exit 1
fi
yum -y groupinstall "development tools"
yum install -y wget git gcc-c++ cmake autoconf aclocal automake pkgconfig libtool \
ImageMagick libjpeg-devel libpng-devel libtiff-devel zlib-devel
cat <<EOT >> ~/.bashrc
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib/pkgconfig
EOT
source ~/.bashrc
cd /usr/local/src
wget -c http://www.leptonica.com/source/leptonica-1.77.0.tar.gz
tar -zxvf leptonica-1.77.0.tar.gz
cd leptonica-1.77.0
./configure
make -j
make install
ldconfig
#make uninstall
#pkg-config --list-all | grep lept
cat <<EOT >> ~/.bashrc
# Export the following paths to compile tesseract
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
export LIBLEPT_HEADERSDIR=/usr/local/include/leptonica
EOT
source ~/.bashrc
cd /usr/local/src
#git clone https://github.com/tesseract-ocr/langdata.git
#git clone https://github.com/tesseract-ocr/tessdata.git
#git clone https://github.com/tesseract-ocr/tessdata_best.git
#git clone https://github.com/tesseract-ocr/tessdata_fast.git
wget -c https://github.com/tesseract-ocr/tesseract/archive/4.0.0.tar.gz
tar -zxvf 4.0.0.tar.gz
cd tesseract-4.0.0/
./autogen.sh
#autoreconf --force --install
autoreconf -i
./configure
#./configure --disable-graphics
make -j
make install
ldconfig
# Download and install tesseract language files (Tesseract 4.0.0 trained data files)
cd /usr/local/share/tessdata
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/chi_sim.traineddata
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/ori.traineddata
wget -c https://github.com/tesseract-ocr/tessdata/raw/master/osd.traineddata
# Best for lstm
#mkdir -p /usr/local/share/tessdata_best && cd tessdata_best
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/chi_sim.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/chi_sim_vert.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/ori.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_best/raw/master/osd.traineddata
# Fast
#mkdir -p /usr/local/share/tessdata_fast && cd tessdata_fast
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/chi_sim.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/chi_sim_vert.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/eng.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/ori.traineddata
#wget -c https://github.com/tesseract-ocr/tessdata_fast/blob/master/osd.traineddata
# Add this line to the end of ~/.bashrc: export TESSDATA_PREFIX=/usr/local/share/tessdata
cat <<EOT >> ~/.bashrc
export TESSDATA_PREFIX=/usr/local/share/tessdata
EOT
source ~/.bashrc
# Verify:
tesseract -v
tesseract --list-langs
# Example Commands:
# tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfile...]
# tesseract myscan.png out.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment