Created
November 17, 2025 15:37
-
-
Save sairamdevarashetty/2e4a989f85a8cf5d0ddede8adda1bf03 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import easyocr | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| from PIL import Image | |
| # ----------------------------- | |
| # CONFIG | |
| # ----------------------------- | |
| IMAGE_FOLDER = "images" # folder containing screenshots | |
| OCR_LANGS = ['ja'] # OCR input language | |
| SOURCE_LANG = "ja" # translation source | |
| TARGET_LANG = "en" # translation target | |
| # ----------------------------- | |
| # OCR SETUP | |
| # ----------------------------- | |
| print("Loading EasyOCR...") | |
| reader = easyocr.Reader(OCR_LANGS) | |
| # ----------------------------- | |
| # TRANSLATION MODEL SETUP | |
| # ----------------------------- | |
| print("Loading M2M100 model...") | |
| model_name = "facebook/m2m100_418M" | |
| tokenizer = M2M100Tokenizer.from_pretrained(model_name) | |
| model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
| tokenizer.src_lang = SOURCE_LANG | |
| def ocr_image(image_path): | |
| """Extract Japanese text from an image.""" | |
| print(f"\nπ OCR on: {image_path}") | |
| results = reader.readtext(image_path, detail=0) | |
| text = "\n".join(results) | |
| return text.strip() | |
| def translate_text(text): | |
| """Translate text using M2M100 model.""" | |
| if not text.strip(): | |
| return "" | |
| print("π Translating...") | |
| encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| generated_tokens = model.generate( | |
| **encoded, | |
| forced_bos_token_id=tokenizer.get_lang_id(TARGET_LANG) | |
| ) | |
| translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
| return translated | |
| def process_images(): | |
| """Process all images in the folder.""" | |
| for filename in os.listdir(IMAGE_FOLDER): | |
| if filename.lower().endswith((".png", ".jpg", ".jpeg")): | |
| path = os.path.join(IMAGE_FOLDER, filename) | |
| # 1. OCR | |
| japanese = ocr_image(path) | |
| print("π Extracted Japanese Text:") | |
| print(japanese if japanese else "[No text detected]") | |
| # 2. Translation | |
| english = translate_text(japanese) | |
| print("\nπ¬π§ Translated English Text:") | |
| print(english if english else "[Could not translate]") | |
| print("-" * 60) | |
| if __name__ == "__main__": | |
| process_images() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment