Skip to content

Instantly share code, notes, and snippets.

@code-yeongyu
Last active October 27, 2023 01:22
Show Gist options
  • Save code-yeongyu/ba161bdbe551bb5aa4523ec89bf68972 to your computer and use it in GitHub Desktop.
Save code-yeongyu/ba161bdbe551bb5aa4523ec89bf68972 to your computer and use it in GitHub Desktop.
Merges consecutive subtitles and reindex.
#!/usr/bin/env python
import re
from pathlib import Path
from typing import Optional, cast
import pysrt
import typer
from rich import print
def remove_situational(subs: pysrt.SubRipFile) -> pysrt.SubRipFile:
"""[자막] 혹은 (자막) 형태의 자막을 제거합니다."""
new_subs = pysrt.SubRipFile()
pattern = re.compile(r"\[.*?\]|\(.*?\)")
for sub in subs:
if not pattern.fullmatch(sub.text):
new_subs.append(sub)
return new_subs
def merge_consecutive_subs(subs: pysrt.SubRipFile) -> pysrt.SubRipFile:
merged_subs = pysrt.SubRipFile()
i = 0
while i < len(subs):
j = i
while j + 1 < len(subs) and subs[j].text == subs[j + 1].text:
j += 1
merged_sub = pysrt.SubRipItem(
index=1, # Temporarily setting to 1, will reindex later
start=subs[i].start,
end=subs[j].end,
text=subs[i].text,
)
merged_subs.append(merged_sub)
i = j + 1
return merged_subs
app = typer.Typer()
@app.command()
def clean_srt(
input_path: Path,
output_path: Optional["Path"] = typer.Argument(None),
is_remove_situational: bool = typer.Option(
False,
"--remove-situational",
"-r",
help="Remove situational subtitles such as [자막] or (자막)",
),
):
input_path = cast(Path, input_path.expanduser().resolve())
if not input_path.exists():
raise typer.BadParameter("Input file does not exist.")
output_path = output_path if output_path else input_path
subs = pysrt.open(str(input_path), encoding="utf-8")
if is_remove_situational:
print("[green]Removing situational subtitles...[/green]")
subs = remove_situational(subs)
print(
f"[green]Successfully read the SRT file from {input_path}. Merging consecutive lines...[/green]"
)
merged_subs = merge_consecutive_subs(subs)
merged_subs.clean_indexes()
print(f"[green]Successfully merged. Saving to {output_path}...[/green]")
merged_subs.save(str(output_path), encoding="utf-8")
print("[blue]Done! Merged SRT file saved.[/blue]")
if __name__ == "__main__":
app()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment