ericwastaken · November 17, 2024 06:08
diff --git a/ocrmypdf.sh b/ocrmypdf.sh
 #!/bin/bash

 # This script performs OCR on a PDF file using OCRmyPDF, 
 # optimizes images and document layout.
 #
 # Uses OCRmyPDF - https://github.com/ocrmypdf/OCRmyPDF
 # 
 # Dependencies:
 # - `brew install tesseract-lang``  # Option 2: for all 
 #   language packs (to support spanish and others)
 # - OCRmyPDF: Install via Homebrew with the command 
 #   `brew install ocrmypdf`.
 # - Ensure Homebrew is installed on your Mac (https://brew.sh).

 # Check if the correct number of arguments is provided
 if [ $# -ne 1 ]; then
  echo "Usage: $0 <input-pdf-file-path>"
  exit 1
 fi

 # Input PDF file path
 input_pdf="$1"

 # Perform OCR with specified options
 #  -l eng+spa \          # Support for both English and Spanish languages
 #  --output-type pdfa \  # Enforce PDF/A for long-term archiving
 #  --oversample 300 \    # Oversample to 300 DPI to improve OCR results
 #  --force-ocr \         # Force OCR on pages that already contain text
 #  -O 1 \                # Use safe, lossless optimizations
 #  --deskew              # For pages that are not straight
 #  --rotate-pages        # Rotates pages that need it

 ocrmypdf \
  -l eng+spa \
  --output-type pdfa \
  --oversample 300 \
  --force-ocr \
  -O 1 \
  --deskew \
  --rotate-pages \
  "$input_pdf" "$input_pdf"

 # Check if the OCR was successful
 if [ $? -eq 0 ]; then
  echo "OCR completed successfully."
 else
  echo "OCR encountered an error."
  exit 1
 fi
	#!/bin/bash

	# This script performs OCR on a PDF file using OCRmyPDF,
	# optimizes images and document layout.
	#
	# Uses OCRmyPDF - https://github.com/ocrmypdf/OCRmyPDF
	#
	# Dependencies:
	# - `brew install tesseract-lang`` # Option 2: for all
	# language packs (to support spanish and others)
	# - OCRmyPDF: Install via Homebrew with the command
	# `brew install ocrmypdf`.
	# - Ensure Homebrew is installed on your Mac (https://brew.sh).

	# Check if the correct number of arguments is provided
	if [ $# -ne 1 ]; then
	echo "Usage: $0 <input-pdf-file-path>"
	exit 1
	fi

	# Input PDF file path
	input_pdf="$1"

	# Perform OCR with specified options
	# -l eng+spa \ # Support for both English and Spanish languages
	# --output-type pdfa \ # Enforce PDF/A for long-term archiving
	# --oversample 300 \ # Oversample to 300 DPI to improve OCR results
	# --force-ocr \ # Force OCR on pages that already contain text
	# -O 1 \ # Use safe, lossless optimizations
	# --deskew # For pages that are not straight
	# --rotate-pages # Rotates pages that need it

	ocrmypdf \
	-l eng+spa \
	--output-type pdfa \
	--oversample 300 \
	--force-ocr \
	-O 1 \
	--deskew \
	--rotate-pages \
	"$input_pdf" "$input_pdf"

	# Check if the OCR was successful
	if [ $? -eq 0 ]; then
	echo "OCR completed successfully."
	else
	echo "OCR encountered an error."
	exit 1
	fi
No results found