Created
February 15, 2026 22:10
-
-
Save me-suzy/a6c2dfa52ee3d05e368d1741e1a3adc5 to your computer and use it in GitHub Desktop.
OCR convert to text API ClaudeAI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| import cv2 | |
| import numpy as np | |
| from pathlib import Path | |
| import sys | |
| import base64 | |
| import anthropic | |
| import time | |
| try: | |
| if sys.platform == 'win32' and hasattr(sys.stdout, 'buffer'): | |
| import io | |
| sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') | |
| sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') | |
| except: | |
| pass | |
| # Configurare API Claude | |
| ANTHROPIC_API_KEY = "YOUR API CODE HERE" # Pune aici cheia ta API | |
| def detect_colored_lines(img): | |
| """Detecteaza liniile colorate""" | |
| hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
| mask_red1 = cv2.inRange(hsv, np.array([0, 50, 50]), np.array([10, 255, 255])) | |
| mask_red2 = cv2.inRange(hsv, np.array([160, 50, 50]), np.array([180, 255, 255])) | |
| mask_red = cv2.bitwise_or(mask_red1, mask_red2) | |
| mask_blue = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255])) | |
| mask_green = cv2.inRange(hsv, np.array([35, 50, 50]), np.array([85, 255, 255])) | |
| mask_yellow = cv2.inRange(hsv, np.array([20, 50, 50]), np.array([35, 255, 255])) | |
| combined_mask = cv2.bitwise_or(mask_red, mask_blue) | |
| combined_mask = cv2.bitwise_or(combined_mask, mask_green) | |
| combined_mask = cv2.bitwise_or(combined_mask, mask_yellow) | |
| s = hsv[:, :, 1] | |
| saturation_mask = (s > 40).astype(np.uint8) * 255 | |
| combined_mask = cv2.bitwise_and(combined_mask, saturation_mask) | |
| h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1)) | |
| lines_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, h_kernel) | |
| for angle in [-10, -7, -5, -3, 3, 5, 7, 10]: | |
| length = 30 | |
| rad = np.deg2rad(angle) | |
| dx = int(length * np.cos(rad)) | |
| dy = int(length * np.sin(rad)) | |
| kernel_size = max(abs(dx), abs(dy)) + 1 | |
| kernel = np.zeros((kernel_size * 2 + 1, kernel_size * 2 + 1), dtype=np.uint8) | |
| cv2.line(kernel, (kernel_size - dx, kernel_size - dy), | |
| (kernel_size + dx, kernel_size + dy), 1, 2) | |
| lines_angle = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel) | |
| lines_mask = cv2.bitwise_or(lines_mask, lines_angle) | |
| dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 5)) | |
| lines_mask = cv2.dilate(lines_mask, dilate_kernel, iterations=1) | |
| return lines_mask | |
| def remove_lines_preserve_text(img, lines_mask): | |
| """Elimina liniile colorate PASTRAND textul intact""" | |
| result = img.copy() | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| text_mask = gray < 100 | |
| bg_mask = (gray > 200) & (lines_mask == 0) | |
| if np.any(bg_mask): | |
| bg_color = np.mean(img[bg_mask], axis=0).astype(np.uint8) | |
| else: | |
| bg_color = np.array([240, 240, 240], dtype=np.uint8) | |
| lines_bool = lines_mask > 0 | |
| replace_mask = lines_bool & (~text_mask) | |
| result[replace_mask] = bg_color | |
| overlap_mask = lines_bool & text_mask | |
| if np.any(overlap_mask): | |
| gray_value = gray[overlap_mask] | |
| result[overlap_mask, 0] = gray_value | |
| result[overlap_mask, 1] = gray_value | |
| result[overlap_mask, 2] = gray_value | |
| return result | |
| def apply_photoscape_settings(img): | |
| """Aplica setarile PhotoScape X pentru text clar""" | |
| img_float = img.astype(np.float32) / 255.0 | |
| img_float[:, :, 0] = img_float[:, :, 0] * 0.6 | |
| img_float[:, :, 2] = np.clip(img_float[:, :, 2] * 1.2, 0, 1) | |
| white_threshold = 0.85 | |
| white_mask = np.mean(img_float, axis=2) > white_threshold | |
| for c in range(3): | |
| channel = img_float[:, :, c] | |
| channel[white_mask] = channel[white_mask] * 0.9 | |
| img_float[:, :, c] = channel | |
| highlights = np.mean(img_float, axis=2) > 0.7 | |
| for c in range(3): | |
| channel = img_float[:, :, c] | |
| channel[highlights] = channel[highlights] * 0.85 | |
| img_float[:, :, c] = channel | |
| mean_val = 0.5 | |
| contrast_factor = 1.5 | |
| img_float = (img_float - mean_val) * contrast_factor + mean_val | |
| img_float = np.clip(img_float, 0, 1) | |
| burn_mask = np.mean(img_float, axis=2) < 0.6 | |
| for c in range(3): | |
| channel = img_float[:, :, c] | |
| channel[burn_mask] = channel[burn_mask] * 0.7 | |
| img_float[:, :, c] = channel | |
| hsv = cv2.cvtColor((img_float * 255).astype(np.uint8), cv2.COLOR_BGR2HSV).astype(np.float32) | |
| hsv[:, :, 1] = np.clip(hsv[:, :, 1] * 1.3, 0, 255) | |
| img_float = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR).astype(np.float32) / 255.0 | |
| result = (np.clip(img_float, 0, 1) * 255).astype(np.uint8) | |
| return result | |
| def make_text_bold_and_sharp(img): | |
| """Face textul mai gros (bold) si mai clar (sharp)""" | |
| blur = cv2.GaussianBlur(img, (0, 0), 2) | |
| sharpened = cv2.addWeighted(img, 1.5, blur, -0.5, 0) | |
| lab = cv2.cvtColor(sharpened, cv2.COLOR_BGR2LAB) | |
| l_channel = lab[:, :, 0] | |
| text_threshold = 80 | |
| text_mask = l_channel < text_threshold | |
| bold_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2)) | |
| text_mask_dilated = cv2.dilate(text_mask.astype(np.uint8), bold_kernel, iterations=1) | |
| new_l = l_channel.copy() | |
| new_text_pixels = (text_mask_dilated > 0) & (~text_mask) | |
| if np.any(new_text_pixels): | |
| avg_text_l = np.mean(l_channel[text_mask]) if np.any(text_mask) else 30 | |
| new_l[new_text_pixels] = avg_text_l | |
| new_l[text_mask] = np.clip(l_channel[text_mask] * 0.8, 0, 255).astype(np.uint8) | |
| lab[:, :, 0] = new_l | |
| result = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) | |
| gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY) | |
| dark_mask = gray < 120 | |
| result_float = result.astype(np.float32) | |
| result_float[dark_mask] = result_float[dark_mask] * 0.85 | |
| result = np.clip(result_float, 0, 255).astype(np.uint8) | |
| return result | |
| def image_to_base64(image_path): | |
| """Converteste imaginea la base64 pentru API""" | |
| with open(image_path, "rb") as f: | |
| return base64.standard_b64encode(f.read()).decode("utf-8") | |
| def get_image_media_type(image_path): | |
| """Determina media type pentru imagine""" | |
| ext = Path(image_path).suffix.lower() | |
| media_types = { | |
| '.jpg': 'image/jpeg', | |
| '.jpeg': 'image/jpeg', | |
| '.png': 'image/png', | |
| '.gif': 'image/gif', | |
| '.webp': 'image/webp' | |
| } | |
| return media_types.get(ext, 'image/jpeg') | |
| def ocr_with_claude(image_path, client): | |
| """Trimite imaginea la Claude pentru OCR""" | |
| image_data = image_to_base64(image_path) | |
| media_type = get_image_media_type(image_path) | |
| message = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=4096, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image", | |
| "source": { | |
| "type": "base64", | |
| "media_type": media_type, | |
| "data": image_data, | |
| }, | |
| }, | |
| { | |
| "type": "text", | |
| "text": """Transcrie EXACT textul din aceasta imagine de carte medicala romaneasca. | |
| Reguli: | |
| 1. Transcrie tot textul vizibil, inclusiv cel de sub liniile rosii/colorate | |
| 2. Pastreaza formatarea originala (paragrafe, alineate) | |
| 3. Pastreaza diacriticele romanesti (ă, â, î, ș, ț) | |
| 4. Pastreaza ortografia originala (ex: "sînt" nu "sunt" daca asa apare) | |
| 5. Marcheaza cu [?] cuvintele pe care nu le poti citi clar | |
| 6. Nu adauga explicatii, doar textul transcris | |
| Incepe transcrierea:""" | |
| } | |
| ], | |
| } | |
| ], | |
| ) | |
| return message.content[0].text | |
| def process_image(image_path, output_folder, client): | |
| """Proceseaza imaginea: curata + OCR""" | |
| img = cv2.imread(str(image_path)) | |
| if img is None: | |
| raise ValueError(f"Nu s-a putut incarca: {image_path}") | |
| filename = Path(image_path).stem | |
| print(f"\n Procesare: {Path(image_path).name}") | |
| # Pasul 1: Procesare imagine (eliminare linii, etc.) | |
| lines_mask = detect_colored_lines(img) | |
| if np.any(lines_mask > 0): | |
| img_no_lines = remove_lines_preserve_text(img, lines_mask) | |
| print(" -> Linii colorate eliminate") | |
| else: | |
| img_no_lines = img | |
| img_photoscape = apply_photoscape_settings(img_no_lines) | |
| result = make_text_bold_and_sharp(img_photoscape) | |
| # Salvare imagine procesata | |
| output_image_path = output_folder / Path(image_path).name | |
| cv2.imwrite(str(output_image_path), result) | |
| print(f" -> Imagine salvata: {output_image_path.name}") | |
| # Pasul 2: OCR cu Claude (folosim imaginea ORIGINALA pentru OCR mai bun) | |
| print(" -> OCR cu Claude API...") | |
| try: | |
| text = ocr_with_claude(image_path, client) | |
| # Salvare text | |
| output_text_path = output_folder / f"{filename}.txt" | |
| with open(output_text_path, 'w', encoding='utf-8') as f: | |
| f.write(text) | |
| print(f" -> Text salvat: {output_text_path.name}") | |
| # Afisare preview text | |
| preview = text[:200] + "..." if len(text) > 200 else text | |
| print(f" -> Preview: {preview}") | |
| except Exception as e: | |
| print(f" -> EROARE OCR: {e}") | |
| text = None | |
| # Pauza pentru rate limiting | |
| time.sleep(1) | |
| return result, text | |
| def process_folder(input_folder, output_folder): | |
| input_path = Path(input_folder) | |
| output_path = Path(output_folder) | |
| output_path.mkdir(parents=True, exist_ok=True) | |
| # Initializare client Claude | |
| client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) | |
| extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff', '*.tif'] | |
| images = [] | |
| for ext in extensions: | |
| images.extend(input_path.glob(ext)) | |
| images.extend(input_path.glob(ext.upper())) | |
| images = sorted(set(images)) | |
| if not images: | |
| print(f"Nu s-au gasit imagini in: {input_folder}") | |
| return | |
| print(f"Gasite {len(images)} imagini de procesat\n") | |
| success = 0 | |
| failed = 0 | |
| all_text = [] | |
| for i, img_path in enumerate(images, 1): | |
| print(f"\n[{i}/{len(images)}] ", end="") | |
| try: | |
| _, text = process_image(img_path, output_path, client) | |
| if text: | |
| all_text.append(f"\n{'='*60}\n{img_path.name}\n{'='*60}\n\n{text}") | |
| success += 1 | |
| except Exception as e: | |
| print(f" [EROARE] {img_path.name}: {e}") | |
| failed += 1 | |
| # Salvare text complet intr-un singur fisier | |
| combined_text_path = output_path / "_TEXT_COMPLET.txt" | |
| with open(combined_text_path, 'w', encoding='utf-8') as f: | |
| f.write("\n".join(all_text)) | |
| print(f"\n\nText complet salvat in: {combined_text_path}") | |
| print(f"\n{'='*60}") | |
| print(f"REZULTAT: {success} procesate, {failed} erori") | |
| print(f"{'='*60}") | |
| if __name__ == "__main__": | |
| input_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\TTT" | |
| output_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\Output" | |
| print("=" * 60) | |
| print("PROCESARE - Imagine + OCR cu Claude API") | |
| print("=" * 60) | |
| print(f"Input: {input_folder}") | |
| print(f"Output: {output_folder}") | |
| print("=" * 60) | |
| process_folder(input_folder, output_folder) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment