i80and · September 14, 2025 20:16
diff --git a/srt_translation.py b/srt_translation.py
 import sys
 from pathlib import Path
 from pydantic import BaseModel
 import ollama

 MODEL = 'qwen3:30b-a3b-thinking-2507-q4_K_M'
 SYSTEM_PROMPT = 'You are a translation engine which will translate subtitles accurately while maintaining context and natural flow.'

 # Define the schema for the response
 class TranslationResult(BaseModel):
    translation: str
    confident: bool

 def create_request_template(text: str, from_lang: str, to_lang: str, previous_corpus: str, next_corpus: str | None) -> str:
    parts = [
        f"I have the following text in {from_lang} which I would like to translate to {to_lang}:\n\n",
        f'"{text}"\n\n'
    ]

    if previous_corpus:
        parts.extend([
            "For context, here are the previous 3 subtitles (already translated):\n",
            previous_corpus,
            "\n\n"
        ])

    if next_corpus:
        parts.extend([
            "For additional context, here is the next subtitle (untranslated):\n",
            f'"{next_corpus}"\n\n'
        ])

    parts.append("Please provide an accurate translation that maintains natural flow and context.")

    return ''.join(parts)

 def translate(text: str, from_lang: str, to_lang: str, previous_corpus: str = "", next_corpus: str | None = None) -> str:
    prompt = create_request_template(text, from_lang, to_lang, previous_corpus, next_corpus)

    response = ollama.chat(
      model=MODEL,
      messages=[
          {'role': 'system', 'content': SYSTEM_PROMPT},
          {'role': 'user', 'content': prompt}
      ],
      format=TranslationResult.model_json_schema(),  # Use Pydantic to generate the schema or format=schema
      options={'temperature': 0},  # Make responses more deterministic
    )

    translated_response = TranslationResult.model_validate_json(response.message.content)
    return translated_response.translation

 def parse_subtitles(text: str) -> list[tuple[int, str, str]]:
    """
    Parse SRT subtitle file format.

    Returns a list of tuples: (subtitle_number, timestamp, subtitle_text)
    Aborts on invalid input.
    """
    if not text.strip():
        raise ValueError("Empty input text")

    subtitles = []
    blocks = text.strip().split('\n\n')

    for block in blocks:
        lines = block.strip().split('\n')

        # Each subtitle block must have at least 3 lines: number, timestamp, text
        if len(lines) < 3:
            raise ValueError(f"Invalid subtitle block: {block}")

        # Parse subtitle number
        try:
            subtitle_num = int(lines[0].strip())
        except ValueError:
            raise ValueError(f"Invalid subtitle number: {lines[0]}")

        # Extract timestamp (second line)
        timestamp = lines[1].strip()

        # Validate timestamp format (basic check for arrow)
        if ' --> ' not in timestamp:
            raise ValueError(f"Invalid timestamp format: {timestamp}")

        # Extract subtitle text (remaining lines joined)
        subtitle_text = '\n'.join(lines[2:]).strip()

        if not subtitle_text:
            raise ValueError(f"Empty subtitle text for subtitle {subtitle_num}")

        subtitles.append((subtitle_num, timestamp, subtitle_text))

    return subtitles

 def main() -> None:
    input_srt = Path(sys.argv[1]).read_text()
    subtitles = parse_subtitles(input_srt)
    translated_subtitles = []

    for i, (sequence_number, timestamp, subtitle_text) in enumerate(subtitles):
        # Build context from previous 3 translated subtitles
        previous_corpus = ""
        if translated_subtitles:
            recent_translations = translated_subtitles[-3:]  # Get last 3
            context_parts = []
            for prev_num, prev_time, prev_text in recent_translations:
                context_parts.append(f"{prev_num}\n{prev_time}\n{prev_text}")
            previous_corpus = "\n\n".join(context_parts)

        # Get next subtitle for context (if available)
        next_corpus = None
        if i + 1 < len(subtitles):
            _, _, next_subtitle_text = subtitles[i + 1]
            next_corpus = next_subtitle_text

        # Translate with context
        translation = translate(
            subtitle_text,
            "Dutch",
            "English",
            previous_corpus,
            next_corpus
        )

        # Store translated subtitle
        translated_subtitles.append((sequence_number, timestamp, translation))

        # Output the translated subtitle in SRT format
        print(f"{sequence_number}")
        print(f"{timestamp}")
        print(f"{translation}")
        print()

 if __name__ == '__main__':
    main()
	import sys
	from pathlib import Path
	from pydantic import BaseModel
	import ollama

	MODEL = 'qwen3:30b-a3b-thinking-2507-q4_K_M'
	SYSTEM_PROMPT = 'You are a translation engine which will translate subtitles accurately while maintaining context and natural flow.'

	# Define the schema for the response
	class TranslationResult(BaseModel):
	translation: str
	confident: bool

	def create_request_template(text: str, from_lang: str, to_lang: str, previous_corpus: str, next_corpus: str \| None) -> str:
	parts = [
	f"I have the following text in {from_lang} which I would like to translate to {to_lang}:\n\n",
	f'"{text}"\n\n'
	]

	if previous_corpus:
	parts.extend([
	"For context, here are the previous 3 subtitles (already translated):\n",
	previous_corpus,
	"\n\n"
	])

	if next_corpus:
	parts.extend([
	"For additional context, here is the next subtitle (untranslated):\n",
	f'"{next_corpus}"\n\n'
	])

	parts.append("Please provide an accurate translation that maintains natural flow and context.")

	return ''.join(parts)

	def translate(text: str, from_lang: str, to_lang: str, previous_corpus: str = "", next_corpus: str \| None = None) -> str:
	prompt = create_request_template(text, from_lang, to_lang, previous_corpus, next_corpus)

	response = ollama.chat(
	model=MODEL,
	messages=[
	{'role': 'system', 'content': SYSTEM_PROMPT},
	{'role': 'user', 'content': prompt}
	],
	format=TranslationResult.model_json_schema(), # Use Pydantic to generate the schema or format=schema
	options={'temperature': 0}, # Make responses more deterministic
	)

	translated_response = TranslationResult.model_validate_json(response.message.content)
	return translated_response.translation

	def parse_subtitles(text: str) -> list[tuple[int, str, str]]:
	"""
	Parse SRT subtitle file format.

	Returns a list of tuples: (subtitle_number, timestamp, subtitle_text)
	Aborts on invalid input.
	"""
	if not text.strip():
	raise ValueError("Empty input text")

	subtitles = []
	blocks = text.strip().split('\n\n')

	for block in blocks:
	lines = block.strip().split('\n')

	# Each subtitle block must have at least 3 lines: number, timestamp, text
	if len(lines) < 3:
	raise ValueError(f"Invalid subtitle block: {block}")

	# Parse subtitle number
	try:
	subtitle_num = int(lines[0].strip())
	except ValueError:
	raise ValueError(f"Invalid subtitle number: {lines[0]}")

	# Extract timestamp (second line)
	timestamp = lines[1].strip()

	# Validate timestamp format (basic check for arrow)
	if ' --> ' not in timestamp:
	raise ValueError(f"Invalid timestamp format: {timestamp}")

	# Extract subtitle text (remaining lines joined)
	subtitle_text = '\n'.join(lines[2:]).strip()

	if not subtitle_text:
	raise ValueError(f"Empty subtitle text for subtitle {subtitle_num}")

	subtitles.append((subtitle_num, timestamp, subtitle_text))

	return subtitles

	def main() -> None:
	input_srt = Path(sys.argv[1]).read_text()
	subtitles = parse_subtitles(input_srt)
	translated_subtitles = []

	for i, (sequence_number, timestamp, subtitle_text) in enumerate(subtitles):
	# Build context from previous 3 translated subtitles
	previous_corpus = ""
	if translated_subtitles:
	recent_translations = translated_subtitles[-3:] # Get last 3
	context_parts = []
	for prev_num, prev_time, prev_text in recent_translations:
	context_parts.append(f"{prev_num}\n{prev_time}\n{prev_text}")
	previous_corpus = "\n\n".join(context_parts)

	# Get next subtitle for context (if available)
	next_corpus = None
	if i + 1 < len(subtitles):
	_, _, next_subtitle_text = subtitles[i + 1]
	next_corpus = next_subtitle_text

	# Translate with context
	translation = translate(
	subtitle_text,
	"Dutch",
	"English",
	previous_corpus,
	next_corpus
	)

	# Store translated subtitle
	translated_subtitles.append((sequence_number, timestamp, translation))

	# Output the translated subtitle in SRT format
	print(f"{sequence_number}")
	print(f"{timestamp}")
	print(f"{translation}")
	print()

	if __name__ == '__main__':
	main()
No results found