Skip to content

Instantly share code, notes, and snippets.

@havardgulldahl
Created February 15, 2025 20:11
Show Gist options
  • Save havardgulldahl/2a7ef3c440d2f0d934c4139259a21cfa to your computer and use it in GitHub Desktop.
Save havardgulldahl/2a7ef3c440d2f0d934c4139259a21cfa to your computer and use it in GitHub Desktop.
A python script to translate a pdf inline, where the translated strings are overlayed on top of the original text -- like translate.google.com will do.
import fitz # PyMuPDF
import asyncio
from googletrans import Translator
from tqdm.asyncio import tqdm # For async progress reporting
__author__ = "[email protected], 2025"
def normalize_color(color_int):
"""
Convert a 24-bit integer RGB color to a tuple with normalized values (0.0 - 1.0).
"""
red = (color_int >> 16) & 0xFF # Extract the red component
green = (color_int >> 8) & 0xFF # Extract the green component
blue = color_int & 0xFF # Extract the blue component
# Normalize each color component to the range 0.0 - 1.0
return (red / 255.0, green / 255.0, blue / 255.0)
async def translate_pdf_async(
input_pdf_path, output_pdf_path, target_language="es", font_path=None
):
"""
Asynchronously translate the text content of a PDF while preserving its original layout,
including fonts, styles, and images. Adds a light gray background to translated strings.
Displays progress bars for pages and spans.
Args:
input_pdf_path (str): Path to the input PDF.
output_pdf_path (str): Path to save the translated PDF.
target_language (str): Target language code for translation (e.g., "es" for Spanish).
font_path (str): Path to a font file (TTF/OTF) to use for text insertion. If None, a built-in font will be used.
Returns:
None
"""
# Open the original PDF
doc = fitz.open(input_pdf_path)
# Create a new PDF document for the translated version
translated_doc = fitz.open()
# Initialize the translator
translator = Translator()
# Initialize progress bar for pages
print("Translating PDF...")
page_progress = tqdm(total=len(doc), desc="Pages", unit="page")
# Loop through pages in the original PDF
for page_number in range(len(doc)):
page = doc[page_number]
# Copy the original page into the translated document
new_page = translated_doc.new_page(
-1, width=page.rect.width, height=page.rect.height
)
new_page.show_pdf_page(new_page.rect, doc, page_number)
# Get text information as a dictionary (blocks, lines, spans)
text_dict = page.get_text("dict")
# Count spans for the current page
spans_count = sum(
len(line["spans"])
for block in text_dict["blocks"]
if block["type"] == 0
for line in block["lines"]
)
span_progress = tqdm(
total=spans_count, desc=f"Page {page_number + 1}", unit="span", leave=False
)
# Loop over text blocks
for block in text_dict["blocks"]:
# We process only text blocks (type==0)
if block["type"] != 0:
continue
# iterate over the lines, but from the bottom up
reversed_lines = block["lines"][::-1]
for line in reversed_lines: # block["lines"]:
for span in line["spans"]:
orig_text = span["text"]
if orig_text.strip(): # If there's actual text
# Display the current string being translated in the span progress bar
span_progress.set_description(
f"Translating: {orig_text[:30]}..."
)
# Translate the text asynchronously
try:
trans = await translator.translate(
orig_text, target_language
)
translated_text = trans.text
except Exception as e:
print(f"Translation error on text: {orig_text}\nError: {e}")
translated_text = orig_text # Fallback
# Get the original span details like position, font, and size
font_size = span["size"]
bbox = span["bbox"] # Bounding box of the text
# Draw a light gray background rectangle (RGBA with alpha for transparency)
# reduce the size of the rectangle slightly to avoid overlapping with adjacent text
bbox = [bbox[0], bbox[1] - 1, bbox[2], bbox[3] + 1]
rect = fitz.Rect(bbox)
new_page.draw_rect(
rect,
color=(0.85, 0.85, 0.85), # Light gray color
fill=(0.85, 0.85, 0.85), # Light gray fill
overlay=True,
)
# Insert the translated text at the same position
new_page.insert_text(
fitz.Point(bbox[0], bbox[3]), # Top-left corner of the bbox
translated_text,
fontsize=font_size
* 0.9, # Reduce font size slightly, since we are using a default font
# fontname="test.ttf", # span["font"], # Use the original font
color=normalize_color(span.get("color", 0)),
)
# Update the span progress bar
span_progress.update(1)
# Close the span progress bar for the current page
span_progress.close()
# Update the page progress bar
page_progress.update(1)
# Close the page progress bar
page_progress.close()
# Save the new translated PDF
translated_doc.save(output_pdf_path)
print(f"Translated PDF saved to {output_pdf_path}")
# If the script is run directly
if __name__ == "__main__":
import argparse
# Define default paths and settings
input_pdf = "input.pdf" # Default input PDF path
output_pdf = "output_translated.pdf" # Default output PDF path
target_language = "en" # Default target language
# Add command-line argument parsing
parser = argparse.ArgumentParser(
description="Translate a PDF file while preserving its layout."
)
parser.add_argument(
"--input", type=str, default=input_pdf, help="Path to the input PDF file."
)
parser.add_argument(
"--output",
type=str,
default=output_pdf,
help="Path to save the translated PDF.",
)
parser.add_argument(
"--lang",
type=str,
default=target_language,
help="Target language for translation (e.g., 'es').",
)
args = parser.parse_args()
# Run the translation function asynchronously
asyncio.run(translate_pdf_async(args.input, args.output, args.lang))
@havardgulldahl
Copy link
Author

The code does not care to reflow text, or respect other elements of the pdf page.

The output is not super pretty, but this gets the job done.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment