Last active
April 6, 2023 13:46
-
-
Save willard1218/2a1f062e904b5d5abe3152ee5b86197e to your computer and use it in GitHub Desktop.
merge srt segment with given time interval
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
""" | |
This is a Python program for merging subtitle files, which uses the third-party library "srt"(https://github.com/cdown/srt) to parse and compose srt files. | |
The basic idea of the program is to parse the original srt file into multiple "srt.Subtitle" objects, and then iterate and process these objects. | |
For adjacent two subtitles, if the time interval between them is less than "MIN_TIME_INTERVALS" (defined in the program as the minimum time interval), they are merged into a new subtitle. | |
Specifically, the content of the original subtitles is extracted and connected with "separator_operator" (defined in the program as the separator), | |
and a new "srt.Subtitle" object is created with the new start and end times. | |
Finally, all the new "srt.Subtitle" objects are merged into a new srt file and output to standard output. | |
The program can be specified the path of the srt file to be processed via command line parameters. If no parameter is specified, the program will output instructions for use. | |
""" | |
import srt | |
import sys | |
MIN_TIME_INTERVALS = 20 | |
separator_operator = '\n' | |
def run(filepath, min_time_intervals): | |
with open(filepath, 'r', encoding="utf-8") as f: | |
lines = f.read() | |
newsubs = [] | |
newsubs_index = 0 | |
subs = list(srt.parse(lines)) | |
start_time = subs[0].start | |
start_idx = 0 | |
end_idx = 0 | |
subs_len = len(subs) | |
for sub in subs: | |
if (start_idx == -1): | |
start_idx = sub.index | |
start_time = sub.start | |
continue | |
if (sub.end - start_time).seconds < min_time_intervals and sub.index != subs[subs_len-1].index: | |
continue | |
# merge sub | |
contents = [] | |
for i in range(start_idx, min(sub.index+1, subs_len)): | |
contents.append(subs[i].content.strip()) | |
end_idx = sub.index | |
new_sub = srt.Subtitle(index=newsubs_index, start=subs[start_idx].start, end=sub.end, content=separator_operator.join(contents)) | |
newsubs.append(new_sub) | |
newsubs_index += 1 | |
start_idx = -1 | |
print(srt.compose(newsubs)) | |
if __name__=='__main__': | |
if len(sys.argv) == 1: | |
print("usage: SrtSegmentMerger ${file_path}") | |
else: | |
filepath = sys.argv[1] | |
run(filepath=filepath, min_time_intervals=MIN_TIME_INTERVALS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment