Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created February 15, 2026 22:10
Show Gist options
  • Select an option

  • Save me-suzy/a6c2dfa52ee3d05e368d1741e1a3adc5 to your computer and use it in GitHub Desktop.

Select an option

Save me-suzy/a6c2dfa52ee3d05e368d1741e1a3adc5 to your computer and use it in GitHub Desktop.
OCR convert to text API ClaudeAI
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from pathlib import Path
import sys
import base64
import anthropic
import time
try:
if sys.platform == 'win32' and hasattr(sys.stdout, 'buffer'):
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
except:
pass
# Configurare API Claude
ANTHROPIC_API_KEY = "YOUR API CODE HERE" # Pune aici cheia ta API
def detect_colored_lines(img):
"""Detecteaza liniile colorate"""
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask_red1 = cv2.inRange(hsv, np.array([0, 50, 50]), np.array([10, 255, 255]))
mask_red2 = cv2.inRange(hsv, np.array([160, 50, 50]), np.array([180, 255, 255]))
mask_red = cv2.bitwise_or(mask_red1, mask_red2)
mask_blue = cv2.inRange(hsv, np.array([100, 50, 50]), np.array([130, 255, 255]))
mask_green = cv2.inRange(hsv, np.array([35, 50, 50]), np.array([85, 255, 255]))
mask_yellow = cv2.inRange(hsv, np.array([20, 50, 50]), np.array([35, 255, 255]))
combined_mask = cv2.bitwise_or(mask_red, mask_blue)
combined_mask = cv2.bitwise_or(combined_mask, mask_green)
combined_mask = cv2.bitwise_or(combined_mask, mask_yellow)
s = hsv[:, :, 1]
saturation_mask = (s > 40).astype(np.uint8) * 255
combined_mask = cv2.bitwise_and(combined_mask, saturation_mask)
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1))
lines_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, h_kernel)
for angle in [-10, -7, -5, -3, 3, 5, 7, 10]:
length = 30
rad = np.deg2rad(angle)
dx = int(length * np.cos(rad))
dy = int(length * np.sin(rad))
kernel_size = max(abs(dx), abs(dy)) + 1
kernel = np.zeros((kernel_size * 2 + 1, kernel_size * 2 + 1), dtype=np.uint8)
cv2.line(kernel, (kernel_size - dx, kernel_size - dy),
(kernel_size + dx, kernel_size + dy), 1, 2)
lines_angle = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel)
lines_mask = cv2.bitwise_or(lines_mask, lines_angle)
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 5))
lines_mask = cv2.dilate(lines_mask, dilate_kernel, iterations=1)
return lines_mask
def remove_lines_preserve_text(img, lines_mask):
"""Elimina liniile colorate PASTRAND textul intact"""
result = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
text_mask = gray < 100
bg_mask = (gray > 200) & (lines_mask == 0)
if np.any(bg_mask):
bg_color = np.mean(img[bg_mask], axis=0).astype(np.uint8)
else:
bg_color = np.array([240, 240, 240], dtype=np.uint8)
lines_bool = lines_mask > 0
replace_mask = lines_bool & (~text_mask)
result[replace_mask] = bg_color
overlap_mask = lines_bool & text_mask
if np.any(overlap_mask):
gray_value = gray[overlap_mask]
result[overlap_mask, 0] = gray_value
result[overlap_mask, 1] = gray_value
result[overlap_mask, 2] = gray_value
return result
def apply_photoscape_settings(img):
"""Aplica setarile PhotoScape X pentru text clar"""
img_float = img.astype(np.float32) / 255.0
img_float[:, :, 0] = img_float[:, :, 0] * 0.6
img_float[:, :, 2] = np.clip(img_float[:, :, 2] * 1.2, 0, 1)
white_threshold = 0.85
white_mask = np.mean(img_float, axis=2) > white_threshold
for c in range(3):
channel = img_float[:, :, c]
channel[white_mask] = channel[white_mask] * 0.9
img_float[:, :, c] = channel
highlights = np.mean(img_float, axis=2) > 0.7
for c in range(3):
channel = img_float[:, :, c]
channel[highlights] = channel[highlights] * 0.85
img_float[:, :, c] = channel
mean_val = 0.5
contrast_factor = 1.5
img_float = (img_float - mean_val) * contrast_factor + mean_val
img_float = np.clip(img_float, 0, 1)
burn_mask = np.mean(img_float, axis=2) < 0.6
for c in range(3):
channel = img_float[:, :, c]
channel[burn_mask] = channel[burn_mask] * 0.7
img_float[:, :, c] = channel
hsv = cv2.cvtColor((img_float * 255).astype(np.uint8), cv2.COLOR_BGR2HSV).astype(np.float32)
hsv[:, :, 1] = np.clip(hsv[:, :, 1] * 1.3, 0, 255)
img_float = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR).astype(np.float32) / 255.0
result = (np.clip(img_float, 0, 1) * 255).astype(np.uint8)
return result
def make_text_bold_and_sharp(img):
"""Face textul mai gros (bold) si mai clar (sharp)"""
blur = cv2.GaussianBlur(img, (0, 0), 2)
sharpened = cv2.addWeighted(img, 1.5, blur, -0.5, 0)
lab = cv2.cvtColor(sharpened, cv2.COLOR_BGR2LAB)
l_channel = lab[:, :, 0]
text_threshold = 80
text_mask = l_channel < text_threshold
bold_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
text_mask_dilated = cv2.dilate(text_mask.astype(np.uint8), bold_kernel, iterations=1)
new_l = l_channel.copy()
new_text_pixels = (text_mask_dilated > 0) & (~text_mask)
if np.any(new_text_pixels):
avg_text_l = np.mean(l_channel[text_mask]) if np.any(text_mask) else 30
new_l[new_text_pixels] = avg_text_l
new_l[text_mask] = np.clip(l_channel[text_mask] * 0.8, 0, 255).astype(np.uint8)
lab[:, :, 0] = new_l
result = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
dark_mask = gray < 120
result_float = result.astype(np.float32)
result_float[dark_mask] = result_float[dark_mask] * 0.85
result = np.clip(result_float, 0, 255).astype(np.uint8)
return result
def image_to_base64(image_path):
"""Converteste imaginea la base64 pentru API"""
with open(image_path, "rb") as f:
return base64.standard_b64encode(f.read()).decode("utf-8")
def get_image_media_type(image_path):
"""Determina media type pentru imagine"""
ext = Path(image_path).suffix.lower()
media_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp'
}
return media_types.get(ext, 'image/jpeg')
def ocr_with_claude(image_path, client):
"""Trimite imaginea la Claude pentru OCR"""
image_data = image_to_base64(image_path)
media_type = get_image_media_type(image_path)
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": image_data,
},
},
{
"type": "text",
"text": """Transcrie EXACT textul din aceasta imagine de carte medicala romaneasca.
Reguli:
1. Transcrie tot textul vizibil, inclusiv cel de sub liniile rosii/colorate
2. Pastreaza formatarea originala (paragrafe, alineate)
3. Pastreaza diacriticele romanesti (ă, â, î, ș, ț)
4. Pastreaza ortografia originala (ex: "sînt" nu "sunt" daca asa apare)
5. Marcheaza cu [?] cuvintele pe care nu le poti citi clar
6. Nu adauga explicatii, doar textul transcris
Incepe transcrierea:"""
}
],
}
],
)
return message.content[0].text
def process_image(image_path, output_folder, client):
"""Proceseaza imaginea: curata + OCR"""
img = cv2.imread(str(image_path))
if img is None:
raise ValueError(f"Nu s-a putut incarca: {image_path}")
filename = Path(image_path).stem
print(f"\n Procesare: {Path(image_path).name}")
# Pasul 1: Procesare imagine (eliminare linii, etc.)
lines_mask = detect_colored_lines(img)
if np.any(lines_mask > 0):
img_no_lines = remove_lines_preserve_text(img, lines_mask)
print(" -> Linii colorate eliminate")
else:
img_no_lines = img
img_photoscape = apply_photoscape_settings(img_no_lines)
result = make_text_bold_and_sharp(img_photoscape)
# Salvare imagine procesata
output_image_path = output_folder / Path(image_path).name
cv2.imwrite(str(output_image_path), result)
print(f" -> Imagine salvata: {output_image_path.name}")
# Pasul 2: OCR cu Claude (folosim imaginea ORIGINALA pentru OCR mai bun)
print(" -> OCR cu Claude API...")
try:
text = ocr_with_claude(image_path, client)
# Salvare text
output_text_path = output_folder / f"{filename}.txt"
with open(output_text_path, 'w', encoding='utf-8') as f:
f.write(text)
print(f" -> Text salvat: {output_text_path.name}")
# Afisare preview text
preview = text[:200] + "..." if len(text) > 200 else text
print(f" -> Preview: {preview}")
except Exception as e:
print(f" -> EROARE OCR: {e}")
text = None
# Pauza pentru rate limiting
time.sleep(1)
return result, text
def process_folder(input_folder, output_folder):
input_path = Path(input_folder)
output_path = Path(output_folder)
output_path.mkdir(parents=True, exist_ok=True)
# Initializare client Claude
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff', '*.tif']
images = []
for ext in extensions:
images.extend(input_path.glob(ext))
images.extend(input_path.glob(ext.upper()))
images = sorted(set(images))
if not images:
print(f"Nu s-au gasit imagini in: {input_folder}")
return
print(f"Gasite {len(images)} imagini de procesat\n")
success = 0
failed = 0
all_text = []
for i, img_path in enumerate(images, 1):
print(f"\n[{i}/{len(images)}] ", end="")
try:
_, text = process_image(img_path, output_path, client)
if text:
all_text.append(f"\n{'='*60}\n{img_path.name}\n{'='*60}\n\n{text}")
success += 1
except Exception as e:
print(f" [EROARE] {img_path.name}: {e}")
failed += 1
# Salvare text complet intr-un singur fisier
combined_text_path = output_path / "_TEXT_COMPLET.txt"
with open(combined_text_path, 'w', encoding='utf-8') as f:
f.write("\n".join(all_text))
print(f"\n\nText complet salvat in: {combined_text_path}")
print(f"\n{'='*60}")
print(f"REZULTAT: {success} procesate, {failed} erori")
print(f"{'='*60}")
if __name__ == "__main__":
input_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\TTT"
output_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\Output"
print("=" * 60)
print("PROCESARE - Imagine + OCR cu Claude API")
print("=" * 60)
print(f"Input: {input_folder}")
print(f"Output: {output_folder}")
print("=" * 60)
process_folder(input_folder, output_folder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment