Created
August 4, 2024 19:11
-
-
Save hannsen/a4dbe4815fa3e46482cc5e2269d5bec2 to your computer and use it in GitHub Desktop.
Translate srt file with deepl web ui using selenium
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make imports | |
import time | |
from random import randint | |
from tkinter import Tk | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.wait import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as e_c | |
from selenium.common.exceptions import ElementClickInterceptedException | |
import re | |
import argparse | |
def get_driver(): | |
options = webdriver.ChromeOptions() | |
options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'}) | |
options.add_argument("--lang=en-US") | |
options.add_argument("--lang=en-GB") | |
options.add_argument("--start-maximized") | |
driver = webdriver.Chrome(options=options) | |
driver.get('https://www.deepl.com/en/translator#es/en-us/') | |
assert "DeepL Translate: The world's most accurate translator" in driver.title | |
cookie_btn = WebDriverWait(driver, 15).until( | |
e_c.visibility_of_element_located((By.CSS_SELECTOR, | |
'[data-testid="cookie-banner-strict-accept-all"], [data-testid="cookie-banner-lax-close-button"]')) | |
) | |
time.sleep(1) | |
action = webdriver.ActionChains(driver) | |
action.move_by_offset(10, 20) | |
action.move_by_offset(10, 20) | |
action.perform() | |
cookie_btn.click() | |
time.sleep(1) | |
shit_stain = '[role="alertdialog"] [data-action="close"]' | |
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0: | |
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain) | |
shit_stain_close.click() | |
time.sleep(2) | |
return driver | |
try: | |
driver = get_driver() | |
except ElementClickInterceptedException as e: | |
driver = get_driver() | |
text_in = driver.find_element(By.CSS_SELECTOR, '[data-testid="translator-source-input"]') | |
text_in.click() | |
time.sleep(0.5) | |
last_clipboard = '' | |
def clip_content(): | |
return driver.execute_script('return document.querySelector(\'div[aria-labelledby="translation-target-heading"][role="textbox"]\').textContent;') | |
r = Tk() | |
r.withdraw() | |
trans = r.selection_get(selection="CLIPBOARD") | |
r.destroy() | |
return trans | |
def get_clipboard_change(orig_text): | |
global last_clipboard | |
trans = '' | |
tries = 0 | |
safety_try = False | |
while tries <= 6: | |
time.sleep(1) | |
# copy_btn = WebDriverWait(driver, 4).until( | |
# e_c.visibility_of_element_located((By.CSS_SELECTOR, '[data-testid="translator-target-toolbar-copy"]')) | |
# ) | |
# copy_btn.click() | |
time.sleep(0.1) | |
trans = clip_content() | |
time.sleep(0.8) | |
# copy_btn.click() | |
time.sleep(0.1) | |
trans2 = clip_content() | |
if trans == trans2 and trans != last_clipboard: | |
if len(trans) < (len(orig_text) * 2) and not safety_try: | |
print('WHOOPS safety try') | |
safety_try = True # might be not done | |
continue | |
break | |
print('WHOOPS next try') | |
tries += 1 | |
last_clipboard = trans | |
return last_clipboard | |
def clear_input(): | |
time.sleep(0.1) | |
clear = driver.find_element(By.CSS_SELECTOR, '[data-testid="translator-source-clear-button"]') | |
clear.click() | |
sleepy = randint(2, 10) / 10 | |
time.sleep(sleepy) | |
def removeShitStains(): | |
shit_stain = '[aria-labelledby="callout-heading-app-ad-callout"] [aria-label="ariaCloseLabel"]' | |
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0: | |
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain) | |
shit_stain_close.click() | |
time.sleep(2) | |
shit_stain = '[aria-labelledby^="recommendation-banner-title-"] [aria-label="Close"]' | |
if len(driver.find_elements(By.CSS_SELECTOR, shit_stain)) > 0: | |
shit_stain_close = driver.find_element(By.CSS_SELECTOR, shit_stain) | |
shit_stain_close.click() | |
time.sleep(2) | |
def parse_srt(srt_content): | |
""" | |
Parses SRT content and returns a list of dictionaries with keys: | |
'index', 'start', 'end', and 'text' | |
""" | |
subtitles = [] | |
srt_blocks = srt_content.strip().split('\n\n') | |
for block in srt_blocks: | |
lines = block.split('\n') | |
index = lines[0] | |
timing = lines[1] | |
text = "\n".join(lines[2:]) | |
start, end = re.match(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', timing).groups() | |
subtitles.append({ | |
'index': index, | |
'start': start, | |
'end': end, | |
'text': text | |
}) | |
return subtitles | |
def translate_text(text, source_lang="ES", target_lang="EN"): | |
print('Translatin ' + text) | |
text_in.send_keys(text) | |
val = get_clipboard_change(text) | |
print('Gottttttin ' + val) | |
clear_input() | |
removeShitStains() | |
print('') | |
return val | |
def translate_srt_to_english(srt_file_path, output_srt_file_path): | |
""" | |
Translates an SRT file to English using DeepL API and saves it as a new SRT file. | |
""" | |
with open(srt_file_path, 'r', encoding='utf-8') as srt_file: | |
srt_content = srt_file.read() | |
subtitles = parse_srt(srt_content) | |
for subtitle in subtitles: | |
try: | |
translated_text = translate_text(subtitle['text']) | |
except Exception as e: | |
print('Borked, trying again: ' + type(e).__name__) | |
print(e) | |
translated_text = translate_text(subtitle['text']) | |
subtitle['text'] = translated_text | |
with open(output_srt_file_path, 'w', encoding='utf-8') as output_srt_file: | |
for subtitle in subtitles: | |
output_srt_file.write(f"{subtitle['index']}\n") | |
output_srt_file.write(f"{subtitle['start']} --> {subtitle['end']}\n") | |
output_srt_file.write(f"{subtitle['text']}\n\n") | |
print(f"Translated SRT file saved as {output_srt_file_path}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Translate SRT file from Spanish to English using DeepL API") | |
parser.add_argument("srt_file_path", help="Path to the input SRT file") | |
args = parser.parse_args() | |
output_srt_file_path = args.srt_file_path.replace(' orig.srt', '.srt') | |
try: | |
translate_srt_to_english(args.srt_file_path, output_srt_file_path) | |
except Exception as e: | |
print(e) | |
print("Sleeping for debug") | |
time.sleep(600) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment