Last active
March 4, 2025 17:25
-
-
Save matthew-brett/cd523fea8826406612f5e3fbab23438f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" Calculate and apply optimal scale and shift to subtitle times | |
Takes CSV of form: | |
text,video_time,srt_time | |
tranquilo,0:01:25,00:01:21.5 | |
ya_empezo_el_abuso,00:34:59.5,00:36:23 | |
ignacio,00:59:27.5,01:01:54.2 | |
tengo_que_llevar,1:20:30.5,01:23:51.2 | |
manana_tienes,1:31:59,01:35:48.6 | |
si_ustedes_lo_deciden,1:37:20.9,01:41:24.6 | |
Notice the (e.g.) 24.6 above in format, as float record of seconds. The ":" | |
separates hours, minutes, and seconds. The required columns "video_time" and | |
"srt_time", give the corresponding time in the video, and in the SRT file, of | |
particular events - here, onset of a particular subtitle. | |
The CSV need only have columns "video_time", and "srt_time"; "text" above is | |
for your own information, to keep track of which subtitles you are syncing to. | |
The program uses "srt_time" and "video_time" to calculate optimal time scaling | |
and offset (with least-squares), and applies this scaling to times in the input | |
SRT file, to generate a new output SRT file, where the "srt_time" times have | |
been scaled and shifted to correspond as exactly as possible (with a linear | |
fit) to the times in "video_time". | |
""" | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter | |
import numpy as np | |
import pandas as pd | |
import pysubs2 | |
def convtime(val): | |
""" Convert text time to time in milliseconds | |
""" | |
h, m, s = (float(v) for v in val.split(':')) | |
return ((h * 60 + m) * 60 + s) * 1000 | |
def fit_times(csv_fname): | |
""" Get least-squares slope and intercept for matching times in `csv_fname` | |
Parameters | |
---------- | |
csv_fname : file-like | |
File-like referring to CSV file with matching times. Should contain | |
two columns `video_time` giving the desired times in the video that | |
particular events should occur, and `srt_time` giving the times at | |
which matching events are currently logged in the SRT file. Entries | |
for both columns should be of form e.g. ``00:01:24.3`` where the `:` | |
separates hours, minutes and seconds, and the 24.3 is a floating point | |
seconds value. | |
Returns | |
------- | |
slope : float | |
Slope to rescale times in "srt_time" (in milleseconds) to match times | |
in "film_time". | |
intercept : float | |
Intercept to shift times in "srt_time" (in milleseconds) to match times | |
in "film_time". | |
""" | |
df = pd.read_csv(csv_fname) | |
film_times = df['video_time'].apply(convtime) | |
srt_times = df['srt_time'].apply(convtime) | |
# Least-squares regression fit (using general linear model machinery). | |
# Same calculation as `scipy.stats.linregress(srt_times, film_times)`. | |
X = np.column_stack((srt_times, np.ones(len(df)))) | |
return np.linalg.pinv(X) @ film_times | |
def get_parser(): | |
parser = ArgumentParser(description=__doc__, # Usage from docstring | |
formatter_class=RawDescriptionHelpFormatter) | |
parser.add_argument('in_srt_fname', help='Input SRT filename') | |
parser.add_argument('in_srt_fps', help='SRT frame-rate (FPS)') | |
parser.add_argument('time_csv', | |
help='CSV with corresponding times (see help)') | |
parser.add_argument('output_srt_fname', help='Output SRT filename') | |
return parser | |
def main(): | |
parser = get_parser() | |
args = parser.parse_args() | |
subs = pysubs2.load(args.in_srt_fname, fps=args.in_srt_fps) | |
slope, intercept = fit_times(args.time_csv) | |
subs.transform_framerate(slope, 1) | |
subs.shift(ms=intercept) | |
subs.save(args.output_srt_fname, format_='srt') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment