Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created February 16, 2026 06:44
Show Gist options
  • Select an option

  • Save me-suzy/343ea201710335a1ee0a3c0be548942e to your computer and use it in GitHub Desktop.

Select an option

Save me-suzy/343ea201710335a1ee0a3c0be548942e to your computer and use it in GitHub Desktop.
OCR convert to text API ClaudeAI 3.py
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from pathlib import Path
import sys
import base64
import anthropic
import time
from PIL import Image, ImageDraw, ImageFont
import textwrap
import re
try:
if sys.platform == 'win32' and hasattr(sys.stdout, 'buffer'):
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
except:
pass
ANTHROPIC_API_KEY = "YOUR_API_KEY_HERE"
def image_to_base64(image_path):
with open(image_path, "rb") as f:
return base64.standard_b64encode(f.read()).decode("utf-8")
def get_image_media_type(image_path):
ext = Path(image_path).suffix.lower()
media_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp'
}
return media_types.get(ext, 'image/jpeg')
def ocr_with_claude(image_path, client):
image_data = image_to_base64(image_path)
media_type = get_image_media_type(image_path)
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=8000,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": image_data,
},
},
{
"type": "text",
"text": """Transcrie EXACT textul din aceasta pagina de carte medicala romaneasca.
REGULI DE FORMATARE:
1. Marcheaza paragrafele noi cu [P]
2. Marcheaza textul ITALIC cu [I]...[/I] (subtitluri, termeni medicali, nume autori)
3. Marcheaza textul BOLD cu [B]...[/B] (titluri tabele, cuvinte-cheie, doze)
4. Pentru TABELE, foloseste formatul:
[TABEL]
Coloana1 | Coloana2 | Coloana3
Valoare1 | Valoare2 | Valoare3
[/TABEL]
5. Pastreaza diacriticele (ă, â, î, ș, ț)
6. Pastreaza ortografia originala (sînt, cînd, vîrstă)
7. Marcheaza [FIGURA] unde apar imagini/diagrame
8. [?] doar pentru cuvinte complet ilizibile
9. Numarul paginii la final
Textul:"""
}
],
}
],
)
return message.content[0].text
def get_font(font_type, size):
if font_type == 'regular':
font_paths = [
"C:/Windows/Fonts/times.ttf",
"C:/Windows/Fonts/georgia.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf",
]
elif font_type == 'bold':
font_paths = [
"C:/Windows/Fonts/timesbd.ttf",
"C:/Windows/Fonts/georgiab.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf",
]
elif font_type == 'italic':
font_paths = [
"C:/Windows/Fonts/timesi.ttf",
"C:/Windows/Fonts/georgiai.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSerif-Italic.ttf",
]
elif font_type == 'bolditalic':
font_paths = [
"C:/Windows/Fonts/timesbi.ttf",
"C:/Windows/Fonts/georgiaz.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSerif-BoldItalic.ttf",
]
else:
font_paths = ["C:/Windows/Fonts/times.ttf"]
for fp in font_paths:
if Path(fp).exists():
try:
return ImageFont.truetype(fp, size)
except:
continue
return ImageFont.load_default()
def get_background_color(img):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
bg_mask = gray > 200
if np.any(bg_mask):
bg_pixels = img_rgb[bg_mask]
bg_color = tuple(np.mean(bg_pixels, axis=0).astype(int))
return bg_color
return (250, 245, 230) # Crem deschis
def draw_justified_text(draw, text, font, x, y, max_width, fill):
"""Deseneaza text aliniat justify (stanga-dreapta)"""
words = text.split()
if len(words) <= 1:
draw.text((x, y), text, fill=fill, font=font)
return
# Calculam latimea totala a cuvintelor
word_widths = []
for word in words:
bbox = draw.textbbox((0, 0), word, font=font)
word_widths.append(bbox[2] - bbox[0])
total_word_width = sum(word_widths)
# Spatiul disponibil pentru spatii
total_space = max_width - total_word_width
if len(words) > 1 and total_space > 0:
space_width = total_space / (len(words) - 1)
else:
space_width = draw.textbbox((0, 0), " ", font=font)[2]
# Desenam fiecare cuvant
current_x = x
for i, word in enumerate(words):
draw.text((current_x, y), word, fill=fill, font=font)
current_x += word_widths[i] + space_width
def parse_table(table_text):
"""Parseaza textul tabelului in randuri si coloane"""
lines = [l.strip() for l in table_text.strip().split('\n') if l.strip()]
rows = []
for line in lines:
if '|' in line:
cols = [c.strip() for c in line.split('|')]
rows.append(cols)
else:
rows.append([line])
return rows
def draw_table(draw, rows, fonts, x, y, max_width, line_height, text_color):
"""Deseneaza un tabel formatat corect"""
if not rows:
return y
# Determinam numarul de coloane
max_cols = max(len(row) for row in rows)
# Calculam latimea fiecarei coloane
col_width = max_width // max_cols if max_cols > 0 else max_width
# Desenam fiecare rand
current_y = y
for row_idx, row in enumerate(rows):
# Prima linie (header) poate fi bold
font = fonts['bold'] if row_idx == 0 else fonts['regular']
for col_idx, cell in enumerate(row):
cell_x = x + col_idx * col_width
# Truncam daca e prea lung
cell_text = cell
bbox = draw.textbbox((0, 0), cell_text, font=font)
while bbox[2] - bbox[0] > col_width - 10 and len(cell_text) > 3:
cell_text = cell_text[:-1]
bbox = draw.textbbox((0, 0), cell_text, font=font)
draw.text((cell_x, current_y), cell_text, fill=text_color, font=font)
current_y += line_height
return current_y
def create_clean_pages(original_img, ocr_text, output_path_base):
"""Creeaza pagini curate cu formatare profesionala"""
height, width = original_img.shape[:2]
bg_color = get_background_color(original_img)
# Font size proportional
base_font_size = max(48, int(height / 50))
fonts = {
'regular': get_font('regular', base_font_size),
'bold': get_font('bold', base_font_size),
'italic': get_font('italic', base_font_size),
'bolditalic': get_font('bolditalic', base_font_size),
}
text_color = (20, 20, 20)
# Margini mici pentru text extins
margin_left = int(width * 0.05)
margin_right = int(width * 0.05)
margin_top = int(height * 0.04)
margin_bottom = int(height * 0.04)
paragraph_indent = int(base_font_size * 1.5)
line_height = int(base_font_size * 1.35)
text_width = width - margin_left - margin_right
avg_char_width = base_font_size * 0.42
chars_per_line = int(text_width / avg_char_width)
# Procesam textul
# Eliminam marcajele si construim structura
content_blocks = []
# Procesam tabelele separat
table_pattern = r'\[TABEL\](.*?)\[/TABEL\]'
# Split by tables first
parts = re.split(table_pattern, ocr_text, flags=re.DOTALL)
in_table = False
for i, part in enumerate(parts):
if i % 2 == 1: # Este tabel
content_blocks.append({'type': 'table', 'content': part})
else:
# Text normal - procesam paragrafele
# Split by [P]
paras = re.split(r'\[P\]', part)
for para in paras:
para = para.strip()
if not para:
continue
# Detectam formatarea
# Procesam bold si italic
content_blocks.append({'type': 'paragraph', 'content': para})
# Generam paginile
pages = []
current_page = Image.new('RGB', (width, height), color=bg_color)
draw = ImageDraw.Draw(current_page)
y_position = margin_top
page_num = 1
for block in content_blocks:
if block['type'] == 'table':
# Procesam tabelul
rows = parse_table(block['content'])
needed_height = len(rows) * line_height + line_height
if y_position + needed_height > height - margin_bottom:
pages.append(current_page)
current_page = Image.new('RGB', (width, height), color=bg_color)
draw = ImageDraw.Draw(current_page)
y_position = margin_top
page_num += 1
y_position = draw_table(draw, rows, fonts, margin_left, y_position,
text_width, line_height, text_color)
y_position += line_height // 2
elif block['type'] == 'paragraph':
para = block['content']
# Eliminam marcajele de formatare pentru wrap
clean_para = re.sub(r'\[/?[BI]\]', '', para)
# Word wrap
wrapped = textwrap.wrap(clean_para, width=chars_per_line - 3)
for i, line in enumerate(wrapped):
if y_position + line_height > height - margin_bottom:
pages.append(current_page)
current_page = Image.new('RGB', (width, height), color=bg_color)
draw = ImageDraw.Draw(current_page)
y_position = margin_top
page_num += 1
# Determinam fontul bazat pe formatare
current_font = fonts['regular']
# Verificam daca linia originala avea formatare
if '[I]' in para and '[B]' in para:
current_font = fonts['bolditalic']
elif '[I]' in para:
current_font = fonts['italic']
elif '[B]' in para:
current_font = fonts['bold']
# Prima linie are indent
if i == 0:
x_pos = margin_left + paragraph_indent
line_max_width = text_width - paragraph_indent
else:
x_pos = margin_left
line_max_width = text_width
# Ultima linie nu e justified
if i == len(wrapped) - 1:
draw.text((x_pos, y_position), line, fill=text_color, font=current_font)
else:
draw_justified_text(draw, line, current_font, x_pos, y_position,
line_max_width, text_color)
y_position += line_height
# Spatiu intre paragrafe
y_position += line_height // 4
# Adaugam ultima pagina
pages.append(current_page)
# Salvam paginile
saved_paths = []
if len(pages) == 1:
output_path = Path(str(output_path_base) + "_clean.jpg")
pages[0].save(output_path, quality=95, dpi=(300, 300))
saved_paths.append(output_path)
else:
for i, page in enumerate(pages, 1):
output_path = Path(str(output_path_base) + f"_clean_p{i}.jpg")
page.save(output_path, quality=95, dpi=(300, 300))
saved_paths.append(output_path)
return saved_paths
def process_image(image_path, output_folder, client):
img = cv2.imread(str(image_path))
if img is None:
raise ValueError(f"Nu s-a putut incarca: {image_path}")
filename = Path(image_path).stem
print(f"\n Procesare: {Path(image_path).name}")
# OCR
print(" -> OCR cu Claude API...")
ocr_text = ocr_with_claude(image_path, client)
# Salvare text
output_text = output_folder / f"{filename}.txt"
with open(output_text, 'w', encoding='utf-8') as f:
f.write(ocr_text)
print(f" -> Text: {output_text.name}")
# Pagini curate
output_base = output_folder / filename
saved_pages = create_clean_pages(img, ocr_text, output_base)
for sp in saved_pages:
print(f" -> Imagine: {sp.name}")
time.sleep(1)
return ocr_text
def process_folder(input_folder, output_folder):
input_path = Path(input_folder)
output_path = Path(output_folder)
output_path.mkdir(parents=True, exist_ok=True)
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff', '*.tif']
images = []
for ext in extensions:
images.extend(input_path.glob(ext))
images.extend(input_path.glob(ext.upper()))
images = sorted(set(images))
if not images:
print(f"Nu s-au gasit imagini in: {input_folder}")
return
print(f"Gasite {len(images)} imagini de procesat")
success = 0
failed = 0
all_text = []
for i, img_path in enumerate(images, 1):
print(f"\n[{i}/{len(images)}]", end="")
try:
text = process_image(img_path, output_path, client)
all_text.append(f"\n{'='*60}\n{img_path.name}\n{'='*60}\n\n{text}")
success += 1
except Exception as e:
print(f" [EROARE] {img_path.name}: {e}")
failed += 1
combined_path = output_path / "_TEXT_COMPLET.txt"
with open(combined_path, 'w', encoding='utf-8') as f:
f.write("\n".join(all_text))
print(f"\n\nText complet: {combined_path}")
print(f"\n{'='*60}")
print(f"REZULTAT: {success} procesate, {failed} erori")
print(f"{'='*60}")
if __name__ == "__main__":
input_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\TTT"
output_folder = r"e:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Edit Text Images (Remove shadows + Remove red LInes)\Output"
print("=" * 60)
print("OCR + Pagini Curate - FORMAT CARTE MEDICALA")
print("=" * 60)
process_folder(input_folder, output_folder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment