Skip to content

Instantly share code, notes, and snippets.

@gmag11
Created November 1, 2024 23:02
Show Gist options
  • Save gmag11/fcc3a7c3fe12837442c8c14aa60ea43a to your computer and use it in GitHub Desktop.
Save gmag11/fcc3a7c3fe12837442c8c14aa60ea43a to your computer and use it in GitHub Desktop.
Merges multiple SRT subtitle files, adds speaker tags, and generates a merged output in both SRT and plain text formats
import re
import sys
from datetime import datetime
# Función para convertir el tiempo SRT a formato datetime
def srt_time_to_datetime(srt_time):
return datetime.strptime(srt_time, '%H:%M:%S,%f')
# Función para convertir el tiempo datetime a formato SRT
def datetime_to_srt_time(dt):
return dt.strftime('%H:%M:%S,%f')[:-3]
# Función para leer el archivo SRT y devolver una lista de subtítulos
def read_srt(file_path, speaker_tag):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
entries = content.strip().split('\n\n')
subtitles = []
for entry in entries:
lines = entry.split('\n')
index = lines[0]
start_time, end_time = lines[1].split(' --> ')
text = '\n'.join(lines[2:])
subtitles.append({
'index': index,
'start_time': srt_time_to_datetime(start_time),
'end_time': srt_time_to_datetime(end_time),
'text': f'[{speaker_tag}] {text}'
})
return subtitles
# Función para escribir la lista de subtítulos a un archivo SRT
def write_srt(file_path, subtitles):
with open(file_path, 'w', encoding='utf-8') as file:
for i, subtitle in enumerate(subtitles, 1):
start_time = datetime_to_srt_time(subtitle['start_time'])
end_time = datetime_to_srt_time(subtitle['end_time'])
text = subtitle['text']
file.write(f"{i}\n{start_time} --> {end_time}\n{text}\n\n")
# Función para escribir el archivo de texto sin timestamps
def write_text(file_path, subtitles):
with open(file_path, 'w', encoding='utf-8') as file:
for subtitle in subtitles:
text = subtitle['text']
file.write(f"{text}\n\n")
# Función para fusionar dos listas de subtítulos
def merge_subtitles(subtitles_list):
merged_subtitles = sorted([subtitle for subtitles in subtitles_list for subtitle in subtitles], key=lambda x: x['start_time'])
return merged_subtitles
# Función principal para procesar los archivos SRT
def process_srt_files(file_paths):
all_subtitles = []
for i, file_path in enumerate(file_paths):
speaker_tag = f'speaker_{i+1}'
subtitles = read_srt(file_path, speaker_tag)
all_subtitles.append(subtitles)
merged_subtitles = merge_subtitles(all_subtitles)
return merged_subtitles
# Obtener la lista de archivos SRT desde la línea de comandos
if len(sys.argv) < 2:
print("Uso: python merge_srt.py <archivo1.srt> <archivo2.srt> ...")
sys.exit(1)
srt_files = sys.argv[1:]
# Procesar los archivos SRT y fusionarlos
merged_subtitles = process_srt_files(srt_files)
# Escribir los subtítulos fusionados a un nuevo archivo SRT
write_srt('merged_subtitles.srt', merged_subtitles)
# Escribir el archivo de texto sin timestamps
write_text('merged_subtitles.txt', merged_subtitles)
print("Los subtítulos han sido fusionados exitosamente y el archivo de texto ha sido generado.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment