Last active
January 11, 2024 07:43
-
-
Save nurrony/7a0dd0eec35ad1b16d5034ee002910c4 to your computer and use it in GitHub Desktop.
Install Tesseract v5.x.x and Training Data 4.1 on Amazon Linux 2023
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
TESSERACT_VERSION=5.3.3 | |
TESSERACT_TRDATA_VERSION=4.1.0 | |
TESSERACT_FILE="$TESSERACT_VERSION.tar.gz" | |
TESSERACT_URL="https://github.com/tesseract-ocr/tesseract/archive/$TESSERACT_FILE" | |
LEPTONICA_VERSION=1.84.1 | |
LEPTONICA_FILE=leptonica-$LEPTONICA_VERSION.tar.gz | |
LEPTONICA_URL=https://github.com/DanBloomberg/leptonica/releases/download/$LEPTONICA_VERSION/leptonica-$LEPTONICA_VERSION.tar.gz | |
sudo dnf upgrade --releasever=2023.3.20240108 && \ | |
sudo dnf groupinstall -y "Development Tools" && \ | |
sudo dnf install -y autoconf \ | |
automake \ | |
libtool \ | |
libjpeg-devel \ | |
libpng-devel \ | |
giflib \ | |
libtiff-devel \ | |
zlib-devel \ | |
ImageMagick \ | |
ImageMagick-devel \ | |
ImageMagick-perl \ | |
libwebp \ | |
zlib \ | |
zlib-devel \ | |
libjpeg \ | |
libjpeg-devel \ | |
libwebp \ | |
libwebp-devel \ | |
libtiff \ | |
libpng \ | |
cairo \ | |
pango \ | |
libicu \ | |
pangomm \ | |
ghostscript \ | |
libpng-devel \ | |
openjpeg2-devel \ | |
openjpeg2-tools \ | |
pango-devel \ | |
cairo-devel \ | |
pango-devel \ | |
libicu-devel \ | |
libtiff-devel \ | |
pangomm-devel \ | |
libcurl-devel \ | |
cairomm-devel \ | |
pangomm-devel \ | |
libjpeg-turbo-devel && \ | |
sudo yum clean all | |
sudo cp /usr/lib64/libjpeg.so.62 /usr/local/lib/ && \ | |
sudo cp /usr/lib64/libwebp.so.7 /usr/local/lib/ && \ | |
sudo cp /usr/lib64/libtiff.so.5 /usr/local/lib/ && \ | |
sudo cp /usr/lib64/libpng16.so.16 /usr/local/lib/ && \ | |
echo "Creating folders" && mkdir ~/libs && cd ~/libs && echo "installing leptonica..." && \ | |
mkdir leptonica && cd leptonica && wget $LEPTONICA_URL && tar -zxvf $LEPTONICA_FILE && rm $LEPTONICA_FILE && cd leptonica-$LEPTONICA_VERSION && \ | |
# Takes ~5 min. on T2.micro Instance machine (Free Tier). | |
./configure && make && sudo make install && cd ~/libs && echo 'Installing Tesseract...' && mkdir tesseract && cd tesseract | |
wget $TESSERACT_URL && tar -zxvf $TESSERACT_FILE && sudo rm $TESSERACT_FILE && cd tesseract-$TESSERACT_VERSION && \ | |
# Takes ~10 min. on T2.micro instance (Free Tier). | |
./autogen.sh && ./configure && make && sudo make install && sudo ldconfig && \ | |
echo 'Installing Training Data...' && cd /usr/local/share/tessdata && \ | |
sudo wget https://github.com/tesseract-ocr/tessdata/raw/main/fra.traineddata && \ | |
sudo wget https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata | |
echo 'export TESSDATA_PREFIX=/usr/local/share/tessdata' >> ~/.bashrc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment