Last active
October 27, 2023 01:22
-
-
Save code-yeongyu/ba161bdbe551bb5aa4523ec89bf68972 to your computer and use it in GitHub Desktop.
Merges consecutive subtitles and reindex.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
from pathlib import Path | |
from typing import Optional, cast | |
import pysrt | |
import typer | |
from rich import print | |
def remove_situational(subs: pysrt.SubRipFile) -> pysrt.SubRipFile: | |
"""[자막] 혹은 (자막) 형태의 자막을 제거합니다.""" | |
new_subs = pysrt.SubRipFile() | |
pattern = re.compile(r"\[.*?\]|\(.*?\)") | |
for sub in subs: | |
if not pattern.fullmatch(sub.text): | |
new_subs.append(sub) | |
return new_subs | |
def merge_consecutive_subs(subs: pysrt.SubRipFile) -> pysrt.SubRipFile: | |
merged_subs = pysrt.SubRipFile() | |
i = 0 | |
while i < len(subs): | |
j = i | |
while j + 1 < len(subs) and subs[j].text == subs[j + 1].text: | |
j += 1 | |
merged_sub = pysrt.SubRipItem( | |
index=1, # Temporarily setting to 1, will reindex later | |
start=subs[i].start, | |
end=subs[j].end, | |
text=subs[i].text, | |
) | |
merged_subs.append(merged_sub) | |
i = j + 1 | |
return merged_subs | |
app = typer.Typer() | |
@app.command() | |
def clean_srt( | |
input_path: Path, | |
output_path: Optional["Path"] = typer.Argument(None), | |
is_remove_situational: bool = typer.Option( | |
False, | |
"--remove-situational", | |
"-r", | |
help="Remove situational subtitles such as [자막] or (자막)", | |
), | |
): | |
input_path = cast(Path, input_path.expanduser().resolve()) | |
if not input_path.exists(): | |
raise typer.BadParameter("Input file does not exist.") | |
output_path = output_path if output_path else input_path | |
subs = pysrt.open(str(input_path), encoding="utf-8") | |
if is_remove_situational: | |
print("[green]Removing situational subtitles...[/green]") | |
subs = remove_situational(subs) | |
print( | |
f"[green]Successfully read the SRT file from {input_path}. Merging consecutive lines...[/green]" | |
) | |
merged_subs = merge_consecutive_subs(subs) | |
merged_subs.clean_indexes() | |
print(f"[green]Successfully merged. Saving to {output_path}...[/green]") | |
merged_subs.save(str(output_path), encoding="utf-8") | |
print("[blue]Done! Merged SRT file saved.[/blue]") | |
if __name__ == "__main__": | |
app() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment