-
-
Save nimatrueway/4589700f49c691e5413c5b2df4d02f4f to your computer and use it in GitHub Desktop.
package main | |
import ( | |
"time" | |
"regexp" | |
"bufio" | |
"strconv" | |
"fmt" | |
"os" | |
"errors" | |
"io" | |
"strings" | |
) | |
type Subtitle struct { | |
idx int | |
fromTime time.Duration | |
toTime time.Duration | |
text string | |
} | |
var timeFramePattern, _ = regexp.Compile(`(\d+):(\d+):(\d+),(\d+) --> (\d+):(\d+):(\d+),(\d+)`) | |
func getDuration(parts []string) time.Duration { | |
hour, _ := strconv.Atoi(parts[0]) | |
minute, _ := strconv.Atoi(parts[1]) | |
second, _ := strconv.Atoi(parts[2]) | |
millisecond, _ := strconv.Atoi(parts[3]) | |
return time.Millisecond * time.Duration(millisecond) + | |
time.Second * time.Duration(second) + | |
time.Minute * time.Duration(minute) + | |
time.Hour * time.Duration(hour) | |
} | |
func printDuration(duration time.Duration) string { | |
hour := duration / time.Hour | |
duration -= hour * time.Hour | |
minute := duration / time.Minute | |
duration -= minute * time.Minute | |
second := duration / time.Second | |
duration -= second * time.Second | |
millisecond := duration / time.Millisecond | |
return fmt.Sprintf(`%02d:%02d:%02d,%03d`, hour, minute, second, millisecond) | |
} | |
func readOneSubtitle(scanner *bufio.Scanner) (*Subtitle, error) { | |
// read idx | |
if !scanner.Scan() { | |
return nil, nil | |
} | |
idxRaw := scanner.Text() | |
idx, err := strconv.Atoi(idxRaw) | |
if err != nil { | |
return nil, errors.New("invalid subtitle index") | |
} | |
// read timing | |
if !scanner.Scan() { | |
return nil, errors.New("could not find subtitle timing") | |
} | |
timing := timeFramePattern.FindStringSubmatch(scanner.Text()) | |
if timing == nil { | |
return nil, errors.New("invalid subtitle timing") | |
} | |
fromTime := getDuration(timing[1:5]) | |
toTime := getDuration(timing[5:9]) | |
// read content | |
if !scanner.Scan() { | |
return nil, errors.New("could not find subtitle text") | |
} | |
content := scanner.Text() | |
for scanner.Scan() && scanner.Text() != "" { | |
content += "\n" | |
content += scanner.Text() | |
} | |
subtitle := &Subtitle{idx, fromTime, toTime, content} | |
return subtitle, nil | |
} | |
func writeOneSubtitle(file io.Writer, subtitle *Subtitle, idx *int) error { | |
_, err := fmt.Fprint(file, | |
*idx, "\n", | |
printDuration(subtitle.fromTime), " --> ", printDuration(subtitle.toTime), "\n", | |
subtitle.text, "\n\n") | |
*idx++ | |
return err | |
} | |
func main() { | |
if len(os.Args) < 2 { | |
println("Provide a subtitle file to fix.\ne.g. subtitle-fixer mysubtitle.srt") | |
return | |
} | |
filePath := os.Args[1] | |
newFilePath := filePath + ".fixed" | |
file, _ := os.Open(filePath) | |
newFile, _ := os.Create(newFilePath) | |
defer file.Close() | |
defer newFile.Close() | |
scanner := bufio.NewScanner(file) | |
var newIdx = 1 | |
var lastSubtitle *Subtitle = nil | |
for { | |
subtitle, err := readOneSubtitle(scanner) | |
if lastSubtitle != nil { | |
if subtitle != nil { | |
subtitle.text = strings.Trim(subtitle.text, "\n ") | |
if len(subtitle.text) == 0 { // skip over empty subtitles | |
continue | |
} | |
// skip over super-short subtitles that basically contain what their previous subtitle contains, and just prolong previous subtitle | |
if subtitle.toTime - subtitle.fromTime < time.Millisecond * 150 && | |
strings.Contains(lastSubtitle.text, subtitle.text) { | |
lastSubtitle.toTime = subtitle.toTime | |
continue | |
} | |
// if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
currentLines := strings.Split(subtitle.text, "\n") | |
lastLines := strings.Split(lastSubtitle.text, "\n") | |
if currentLines[0] == lastLines[len(lastLines)-1] { | |
subtitle.text = strings.Join(currentLines[1:], "\n") | |
} | |
// if first-line of current subtitle is repeating last-line of previous-subtitle remove it | |
if subtitle.fromTime < lastSubtitle.toTime { | |
lastSubtitle.toTime = subtitle.fromTime - time.Millisecond | |
} | |
} | |
writeOneSubtitle(newFile, lastSubtitle, &newIdx) | |
} | |
if subtitle == nil { | |
break | |
} | |
if err != nil { | |
panic(err) | |
} | |
lastSubtitle = subtitle | |
} | |
os.Rename(filePath, filePath + ".bak") | |
os.Rename(newFilePath, filePath) | |
} |
I modified this slightly to handle combining subtitles with overlapping timestamps
I have try both of you code to resync youtube subs, but the output result is .fixed file with 0 kb size
I have try both of you code to resync youtube subs, but the output result is .fixed file with 0 kb size
Send me your subtitle file or video link, I'll take a look on weekend.
How can I run this.. noob here
@jfeelio
Download and compile subtitle-overlap-fixer
wget https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f/raw/a3cbf48edd6ad0377b158e1455a702895e17f2dd/subtitle-overlap-fixer.go
go build subtitle-overlap-fixer.go
Download an auto-generated subtitle from youtube:
youtube-dl --write-auto-sub --skip-download 'https://www.youtube.com/watch?v={VIDEO-ID}'
ffmpeg -i '{DOWNLOADED-VVT-FILE}' '{SRT-FILE}.srt'
Fix overlapping parts of subtitle in your converted '{SRT-FILE}.srt'
./subtitle-overlap-fixer '{SRT-FILE}.srt'
Actually I am having the same problem the output file ends up coming to be 0 kb. please help
Actually I am having the same problem the output file ends up coming to be 0 kb. please help
Me too. I ended up just giving up and using https://github.com/SubtitleEdit/subtitleedit
@jfeelio
Download and compilesubtitle-overlap-fixer
wget https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f/raw/a3cbf48edd6ad0377b158e1455a702895e17f2dd/subtitle-overlap-fixer.go go build subtitle-overlap-fixer.go
Download an auto-generated subtitle from youtube:
youtube-dl --write-auto-sub --skip-download 'https://www.youtube.com/watch?v={VIDEO-ID}' ffmpeg -i '{DOWNLOADED-VVT-FILE}' '{SRT-FILE}.srt'
Fix overlapping parts of subtitle in your converted
'{SRT-FILE}.srt'
./subtitle-overlap-fixer '{SRT-FILE}.srt'
Hello,
When I use "ffmpeg -fix_sub_duration -i download.srt new.srt", fix the overlapping but remove the last line of subtitle, please how to evite this.
Thanks.
Actually I am having the same problem the output file ends up coming to be 0 kb. please help
I found the same problem, this happen when the file have BOM mark.
I removed the BOM mark, but now result only file with extension .fixed, no bak file is create, and the content is a only paragraph with time of start and end and all the text of file.
Greetings.
Thanks for sharing! Already had an .srt from youtube, so I just ran
sudo apt install golang-go
go build subtitle-overlap-fixer.go
./subtitle-overlap-fixer subtitles.srt
Output file worked great.
Hi Nima, this solved a problem for me so well. Thank you so much.
I use this tool as part of a little YouTube + Mac terminal routine to create and burn in captions for accessibility purposes, using youtube-dlc and ffmpeg as well. It's pretty neat.
I'm thinking about creating a bash script to do it all semi-automatically and maybe write a blog post about it so that others can use this routine to produce captioned videos quickly. If I do end up writing that script + blog post, may I include a link to your gist with credit (and praise!) in my blog post?
Thanks again!
@niceindividual That would be my pleasure. 🌹
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42
00:03:14,000 --> 00:03:14,159
I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Great job! It solved the problem I had with the overlapping. Thanks a lot.
AWESOME! Did exactly what I needed. There were some additional fixes I needed to do (sentence capitalization and changing lowercase "i" to uppercase "I" where needed.). I wrote a little OS X bash script for these issues if anyone want to try it. https://github.com/bruno-sardine/mac#Further-correct-YouTube-captions-captfixsh
For anyone else who would find this useful:
I had an issue with my SRT file being rejected by a picky program for having occasional blank entries like
42 00:03:14,000 --> 00:03:14,159
I moved the section at https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L111-L113 down to the end of the block to perform this check last. My thinking is that I was running into issues at line https://gist.github.com/nimatrueway/4589700f49c691e5413c5b2df4d02f4f#file-subtitle-overlap-fixer-go-L124 that subverted the previous empty line check. In any case, pushing this section down seemed to resolve my issues!
Cheers to @nimatrueway for this awesome script. It saved me a ton of time!
Thanks fork it
Thank you for sharing this, @nimatrueway.
It basically :
to-time
of the previous subtitle tofrom-time
of the next subtitle - 1 milliseconds if they overlap time-wise.