Skip to content

Instantly share code, notes, and snippets.

@nurrony
Last active January 11, 2024 07:43
Show Gist options
  • Save nurrony/7a0dd0eec35ad1b16d5034ee002910c4 to your computer and use it in GitHub Desktop.
Save nurrony/7a0dd0eec35ad1b16d5034ee002910c4 to your computer and use it in GitHub Desktop.
Install Tesseract v5.x.x and Training Data 4.1 on Amazon Linux 2023
#!/usr/bin/env bash
TESSERACT_VERSION=5.3.3
TESSERACT_TRDATA_VERSION=4.1.0
TESSERACT_FILE="$TESSERACT_VERSION.tar.gz"
TESSERACT_URL="https://github.com/tesseract-ocr/tesseract/archive/$TESSERACT_FILE"
LEPTONICA_VERSION=1.84.1
LEPTONICA_FILE=leptonica-$LEPTONICA_VERSION.tar.gz
LEPTONICA_URL=https://github.com/DanBloomberg/leptonica/releases/download/$LEPTONICA_VERSION/leptonica-$LEPTONICA_VERSION.tar.gz
sudo dnf upgrade --releasever=2023.3.20240108 && \
sudo dnf groupinstall -y "Development Tools" && \
sudo dnf install -y autoconf \
automake \
libtool \
libjpeg-devel \
libpng-devel \
giflib \
libtiff-devel \
zlib-devel \
ImageMagick \
ImageMagick-devel \
ImageMagick-perl \
libwebp \
zlib \
zlib-devel \
libjpeg \
libjpeg-devel \
libwebp \
libwebp-devel \
libtiff \
libpng \
cairo \
pango \
libicu \
pangomm \
ghostscript \
libpng-devel \
openjpeg2-devel \
openjpeg2-tools \
pango-devel \
cairo-devel \
pango-devel \
libicu-devel \
libtiff-devel \
pangomm-devel \
libcurl-devel \
cairomm-devel \
pangomm-devel \
libjpeg-turbo-devel && \
sudo yum clean all
sudo cp /usr/lib64/libjpeg.so.62 /usr/local/lib/ && \
sudo cp /usr/lib64/libwebp.so.7 /usr/local/lib/ && \
sudo cp /usr/lib64/libtiff.so.5 /usr/local/lib/ && \
sudo cp /usr/lib64/libpng16.so.16 /usr/local/lib/ && \
echo "Creating folders" && mkdir ~/libs && cd ~/libs && echo "installing leptonica..." && \
mkdir leptonica && cd leptonica && wget $LEPTONICA_URL && tar -zxvf $LEPTONICA_FILE && rm $LEPTONICA_FILE && cd leptonica-$LEPTONICA_VERSION && \
# Takes ~5 min. on T2.micro Instance machine (Free Tier).
./configure && make && sudo make install && cd ~/libs && echo 'Installing Tesseract...' && mkdir tesseract && cd tesseract
wget $TESSERACT_URL && tar -zxvf $TESSERACT_FILE && sudo rm $TESSERACT_FILE && cd tesseract-$TESSERACT_VERSION && \
# Takes ~10 min. on T2.micro instance (Free Tier).
./autogen.sh && ./configure && make && sudo make install && sudo ldconfig && \
echo 'Installing Training Data...' && cd /usr/local/share/tessdata && \
sudo wget https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \
sudo wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata
echo 'export TESSDATA_PREFIX=/usr/local/share/tessdata' >> ~/.bashrc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment