Skip to content

Instantly share code, notes, and snippets.

@gary23w
Last active February 5, 2025 17:19
Show Gist options
  • Save gary23w/d9687d096e75c28b29e8cab9dbb61572 to your computer and use it in GitHub Desktop.
Save gary23w/d9687d096e75c28b29e8cab9dbb61572 to your computer and use it in GitHub Desktop.
image-to-string
#!/bin/bash
mkdir -p trocr_app
cd trocr_app
cat << 'EOF' > ocr.py
import sys
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
if len(sys.argv) != 2:
print("Usage: python ocr.py <image_path>")
sys.exit(1)
image_path = sys.argv[1]
try:
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
image = Image.open(image_path).convert('RGB')
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print("Extracted Text:")
print(generated_text)
except Exception as e:
print(f"An error occurred: {e}")
EOF
cat << 'EOF' > requirements.txt
transformers
torch
Pillow
EOF
cat << 'EOF' > Dockerfile
FROM python:3.9-slim
RUN apt-get update && apt-get install -y \
git \
libgl1-mesa-glx \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY ocr.py .
RUN mkdir /models && \
python -c "\
from transformers import TrOCRProcessor, VisionEncoderDecoderModel; \
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed'); \
processor.save_pretrained('/models'); \
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed'); \
model.save_pretrained('/models')"
ENV MODEL_DIR=/models
ENTRYPOINT ["python", "ocr.py"]
EOF
docker build -t trocr_app .
echo "docker run --rm -v \$(pwd):/gary trocr_app /gary/your_image_file.png"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment