Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created April 24, 2025 20:05
Show Gist options
  • Save me-suzy/0881e999e120c7168ac1d9b6a7b53ee2 to your computer and use it in GitHub Desktop.
Save me-suzy/0881e999e120c7168ac1d9b6a7b53ee2 to your computer and use it in GitHub Desktop.
457546754.py
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import pytesseract
# Convert the previously corrected image to RGB (required for pytesseract)
warped_rgb = cv2.cvtColor(warped, cv2.COLOR_BGR2RGB)
# Use pytesseract to extract text
extracted_text = pytesseract.image_to_string(warped_rgb, lang='ron')
# Return the text
extracted_text[:2000] # Show first 2000 characters for preview
# Load the image again, resize for better OCR performance
ocr_image = warped_rgb
ocr_pil_image = Image.fromarray(ocr_image).resize((ocr_image.shape[1]//2, ocr_image.shape[0]//2))
# Load TrOCR processor and model (OCR with Transformers)
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
# Preprocess image and generate prediction
pixel_values = processor(images=ocr_pil_image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
ocr_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
ocr_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment