Skip to content

Instantly share code, notes, and snippets.

@jasdev
Last active November 15, 2022 20:50
Show Gist options
  • Save jasdev/f71e0079e8b2ec26baa447f37b0ed0d8 to your computer and use it in GitHub Desktop.
Save jasdev/f71e0079e8b2ec26baa447f37b0ed0d8 to your computer and use it in GitHub Desktop.
Sketch of an SRT file parser.
import Parsing
import Foundation
// [SubRip file format spec.](https://en.wikipedia.org/wiki/SubRip#File_format)
let sampleSRTString =
"""
1
00:00:00,540 --> 00:00:00,960
Yo-yo
2
00:00:00,960 --> 00:00:01,490
yo
3
00:00:01,520 --> 00:00:01,830
this
4
00:00:01,830 --> 00:00:02,010
is
5
00:00:02,010 --> 00:00:02,460
an
6
00:00:02,460 --> 00:00:02,760
audio
7
00:00:02,760 --> 00:00:03,240
recording
"""
let timecodeHours = Prefix(2).pipe(Int.parser(isSigned: false)).utf8
let timecodeMinutes = Prefix(2).pipe(Int.parser(isSigned: false)).utf8
let timecodeSeconds = Prefix(2).pipe(Int.parser(isSigned: false)).utf8
let timecodeMilliseconds = Prefix(3).pipe(Int.parser(isSigned: false)).utf8
let timecodeParser = timecodeHours
.skip(StartsWith(":".utf8))
.take(timecodeMinutes)
.skip(StartsWith(":".utf8))
.take(timecodeSeconds)
.skip(StartsWith(",".utf8))
.take(timecodeMilliseconds)
.map { hours, minutes, seconds, milliseconds -> TimeInterval in
let hoursInSeconds = Double(hours) * 60 * 60
let minutesInSeconds = Double(minutes) * 60
let millisecondsInSeconds = Double(milliseconds) * 1 / 1_000
return hoursInSeconds + minutesInSeconds + Double(seconds) + millisecondsInSeconds
}
let timecodeLineParser = timecodeParser
.skip(StartsWith(" --> ".utf8))
.take(timecodeParser)
.filter(<) // `start`- and `endTimeCode`s must strictly increase within a group.
struct SubtitleGroup {
var sequenceNumber: Int
var startTimecode: TimeInterval
var endTimecode: TimeInterval
var substring: String
}
let srtGroupParser = Int.parser(isSigned: false)
.skip(Newline())
.take(timecodeLineParser)
.skip(Newline())
.take(
PrefixUpTo("\n\n")
.orElse(PrefixUpTo("\r\n\r\n"))
.orElse(Rest())
.utf8
)
.map { sequenceNumber, timecodes, substring in
SubtitleGroup(
sequenceNumber: sequenceNumber,
startTimecode: timecodes.0,
endTimecode: timecodes.1,
substring: String(substring)
)
}
let srtParser = Many(srtGroupParser, separator: Newline().skip(Newline()))
.skip(End())
.flatMap { groups in
groups.sequenceNumbersAndTimecodesAreInIncreasingOrder() ?
Conditional.first(Always(groups)) :
.second(Fail())
}
private extension Collection where Element == SubtitleGroup {
func sequenceNumbersAndTimecodesAreInIncreasingOrder () -> Bool {
guard first?.sequenceNumber == 1 else { return false }
return zip(self, self.dropFirst())
.allSatisfy { first, second in
first.sequenceNumber + 1 == second.sequenceNumber &&
first.endTimecode <= second.startTimecode // Adjacent groups must have _non-decreasing_ timecodes.
}
}
}
dump(srtParser.parse(sampleSRTString.utf8))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment