Skip to content

Instantly share code, notes, and snippets.

@hannsen
Created August 4, 2024 19:11
Show Gist options
  • Save hannsen/a4dbe4815fa3e46482cc5e2269d5bec2 to your computer and use it in GitHub Desktop.
Save hannsen/a4dbe4815fa3e46482cc5e2269d5bec2 to your computer and use it in GitHub Desktop.
Translate srt file with deepl web ui using selenium
# Make imports
import time
from random import randint
from tkinter import Tk
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as e_c
from selenium.common.exceptions import ElementClickInterceptedException
import re
import argparse
def get_driver():
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
options.add_argument("--lang=en-US")
options.add_argument("--lang=en-GB")
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)
driver.get('https://www.deepl.com/en/translator#es/en-us/')
assert "DeepL Translate: The world's most accurate translator" in driver.title
cookie_btn = WebDriverWait(driver, 15).until(
e_c.visibility_of_element_located((By.CSS_SELECTOR,
'[data-testid="cookie-banner-strict-accept-all"], [data-testid="cookie-banner-lax-close-button"]'))
)
time.sleep(1)
action = webdriver.ActionChains(driver)
action.move_by_offset(10, 20)
action.move_by_offset(10, 20)
action.perform()
cookie_btn.click()
time.sleep(1)
shit_stain = '[role="alertdialog"] [data-action="close"]'
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0:
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain)
shit_stain_close.click()
time.sleep(2)
return driver
try:
driver = get_driver()
except ElementClickInterceptedException as e:
driver = get_driver()
text_in = driver.find_element(By.CSS_SELECTOR, '[data-testid="translator-source-input"]')
text_in.click()
time.sleep(0.5)
last_clipboard = ''
def clip_content():
return driver.execute_script('return document.querySelector(\'div[aria-labelledby="translation-target-heading"][role="textbox"]\').textContent;')
r = Tk()
r.withdraw()
trans = r.selection_get(selection="CLIPBOARD")
r.destroy()
return trans
def get_clipboard_change(orig_text):
global last_clipboard
trans = ''
tries = 0
safety_try = False
while tries <= 6:
time.sleep(1)
# copy_btn = WebDriverWait(driver, 4).until(
# e_c.visibility_of_element_located((By.CSS_SELECTOR, '[data-testid="translator-target-toolbar-copy"]'))
# )
# copy_btn.click()
time.sleep(0.1)
trans = clip_content()
time.sleep(0.8)
# copy_btn.click()
time.sleep(0.1)
trans2 = clip_content()
if trans == trans2 and trans != last_clipboard:
if len(trans) < (len(orig_text) * 2) and not safety_try:
print('WHOOPS safety try')
safety_try = True # might be not done
continue
break
print('WHOOPS next try')
tries += 1
last_clipboard = trans
return last_clipboard
def clear_input():
time.sleep(0.1)
clear = driver.find_element(By.CSS_SELECTOR, '[data-testid="translator-source-clear-button"]')
clear.click()
sleepy = randint(2, 10) / 10
time.sleep(sleepy)
def removeShitStains():
shit_stain = '[aria-labelledby="callout-heading-app-ad-callout"] [aria-label="ariaCloseLabel"]'
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0:
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain)
shit_stain_close.click()
time.sleep(2)
shit_stain = '[aria-labelledby^="recommendation-banner-title-"] [aria-label="Close"]'
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0:
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain)
shit_stain_close.click()
time.sleep(2)
def parse_srt(srt_content):
"""
Parses SRT content and returns a list of dictionaries with keys:
'index', 'start', 'end', and 'text'
"""
subtitles = []
srt_blocks = srt_content.strip().split('\n\n')
for block in srt_blocks:
lines = block.split('\n')
index = lines[0]
timing = lines[1]
text = "\n".join(lines[2:])
start, end = re.match(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', timing).groups()
subtitles.append({
'index': index,
'start': start,
'end': end,
'text': text
})
return subtitles
def translate_text(text, source_lang="ES", target_lang="EN"):
print('Translatin ' + text)
text_in.send_keys(text)
val = get_clipboard_change(text)
print('Gottttttin ' + val)
clear_input()
removeShitStains()
print('')
return val
def translate_srt_to_english(srt_file_path, output_srt_file_path):
"""
Translates an SRT file to English using DeepL API and saves it as a new SRT file.
"""
with open(srt_file_path, 'r', encoding='utf-8') as srt_file:
srt_content = srt_file.read()
subtitles = parse_srt(srt_content)
for subtitle in subtitles:
try:
translated_text = translate_text(subtitle['text'])
except Exception as e:
print('Borked, trying again: ' + type(e).__name__)
print(e)
translated_text = translate_text(subtitle['text'])
subtitle['text'] = translated_text
with open(output_srt_file_path, 'w', encoding='utf-8') as output_srt_file:
for subtitle in subtitles:
output_srt_file.write(f"{subtitle['index']}\n")
output_srt_file.write(f"{subtitle['start']} --> {subtitle['end']}\n")
output_srt_file.write(f"{subtitle['text']}\n\n")
print(f"Translated SRT file saved as {output_srt_file_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Translate SRT file from Spanish to English using DeepL API")
parser.add_argument("srt_file_path", help="Path to the input SRT file")
args = parser.parse_args()
output_srt_file_path = args.srt_file_path.replace(' orig.srt', '.srt')
try:
translate_srt_to_english(args.srt_file_path, output_srt_file_path)
except Exception as e:
print(e)
print("Sleeping for debug")
time.sleep(600)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment