Created
May 13, 2025 18:50
-
-
Save kadamwhite/ced5cb41d129a4192aefe774389e9e6c to your computer and use it in GitHub Desktop.
Convert a transcript from Loom into an SRT file which can be used with the video file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* This script is designed to receive piped content from a text file containing | |
* a loom.com video transcript, which is usually copyable on the free plan as | |
* a series of lines in the format | |
* 0:02 What I said then, etc etc etc... | |
* | |
* The output of the script is SRT-format subtitle text. It can be output into | |
* a .srt file for use in VLC or other subtitle-aware video players, or later | |
* combination with a video source using ffmpeg. | |
*/ | |
const readline = require( 'readline' ); | |
// This is a nodejs file. | |
// Constants | |
const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed | |
const MAX_CHARS_PER_LINE_SRT = 35; | |
const MAX_LINES_PER_SRT = 2; | |
const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT; | |
const MIN_IDEAL_DURATION_SRT = 0.8; // Minimum ideal duration for a subtitle segment | |
const MAX_IDEAL_DURATION_SRT = 5.0; // Maximum ideal duration for a subtitle segment | |
// Helper function to parse "M:SS" or "MM:SS" into seconds | |
function parseTime( timeStr ) { | |
const parts = timeStr.split( ':' ); | |
const minutes = parseInt( parts[ 0 ], 10 ); | |
const seconds = parseInt( parts[ 1 ], 10 ); | |
return minutes * 60 + seconds; | |
} | |
// Helper function to format seconds into HH:MM:SS,mmm | |
function formatSrtTime( totalSeconds ) { | |
// Work with total milliseconds to avoid floating point inaccuracies and ripple effects | |
let totalMilliseconds = Math.round( totalSeconds * 1000 ); | |
const hours = Math.floor( totalMilliseconds / 3600000 ); | |
totalMilliseconds %= 3600000; | |
const minutes = Math.floor( totalMilliseconds / 60000 ); | |
totalMilliseconds %= 60000; | |
const seconds = Math.floor( totalMilliseconds / 1000 ); | |
const milliseconds = totalMilliseconds % 1000; | |
return `${ String( hours ).padStart( 2, '0' ) }:${ String( minutes ).padStart( | |
2, | |
'0' | |
) }:${ String( seconds ).padStart( 2, '0' ) },${ String( milliseconds ).padStart( | |
3, | |
'0' | |
) }`; | |
} | |
// Helper function to split text into chunks that fit MAX_CHARS_PER_SRT_BLOCK | |
function splitTextIntoChunks( text, maxLength ) { | |
const chunks = []; | |
const words = text.split( /\s+/ ).filter( ( w ) => w.length > 0 ); | |
if ( words.length === 0 ) return []; | |
let currentChunk = ''; | |
for ( const word of words ) { | |
if ( currentChunk.length === 0 ) { | |
currentChunk = word; | |
} else if ( currentChunk.length + 1 + word.length <= maxLength ) { | |
currentChunk += ' ' + word; | |
} else { | |
chunks.push( currentChunk ); | |
currentChunk = word; | |
} | |
} | |
if ( currentChunk.length > 0 ) { | |
chunks.push( currentChunk ); | |
} | |
return chunks; | |
} | |
// Helper function to format a single chunk of text into at most MAX_LINES_PER_SRT lines | |
function formatTextForSrt( textChunk, maxCharsPerLine ) { | |
const words = textChunk.split( /\s+/ ).filter( ( w ) => w.length > 0 ); | |
if ( words.length === 0 ) return ''; | |
const lines = []; | |
let currentLine = ''; | |
for ( const word of words ) { | |
if ( MAX_LINES_PER_SRT > 0 && lines.length === MAX_LINES_PER_SRT ) { | |
// Already have max lines, append remaining words to the last line | |
lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + word; | |
continue; | |
} | |
if ( currentLine.length === 0 ) { | |
currentLine = word; | |
} else if ( currentLine.length + 1 + word.length <= maxCharsPerLine ) { | |
currentLine += ' ' + word; | |
} else { | |
lines.push( currentLine ); | |
currentLine = word; // Start new line | |
} | |
} | |
// Add the last accumulated line, if any, and if there's space for it | |
if ( currentLine.length > 0 ) { | |
if ( lines.length < MAX_LINES_PER_SRT ) { | |
lines.push( currentLine ); | |
} else if ( MAX_LINES_PER_SRT > 0 ) { | |
// Append to last line if max lines already formed | |
lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + currentLine; | |
} | |
// If MAX_LINES_PER_SRT is 0, currentLine is dropped (edge case) | |
} | |
return lines.join( '\n' ); | |
} | |
const rl = readline.createInterface( { | |
input: process.stdin, | |
terminal: false, // Ensure it works with piped input | |
} ); | |
const rawEntries = []; | |
rl.on( 'line', ( line ) => { | |
const match = line.match( /^(\d{1,2}:\d{2})\s+(.*)$/ ); | |
if ( match ) { | |
const timeStr = match[ 1 ]; | |
const text = match[ 2 ].trim(); | |
if ( text.length > 0 ) { | |
// Only process lines with actual text | |
rawEntries.push( { timeStr, text } ); | |
} | |
} | |
} ); | |
rl.on( 'close', () => { | |
const segments = rawEntries | |
.map( ( entry ) => ( { | |
timeSeconds: parseTime( entry.timeStr ), | |
text: entry.text, | |
} ) ) | |
.sort( ( a, b ) => a.timeSeconds - b.timeSeconds ); // Sort by time | |
const srtOutput = []; | |
let subtitleIndex = 1; | |
for ( let i = 0; i < segments.length; i++ ) { | |
const currentSegment = segments[ i ]; | |
const startTime = currentSegment.timeSeconds; | |
const text = currentSegment.text; | |
let endTimeOverallSegment; | |
if ( i + 1 < segments.length ) { | |
endTimeOverallSegment = segments[ i + 1 ].timeSeconds; | |
} else { | |
// For the last segment, estimate duration based on text length | |
const estimatedDuration = Math.max( | |
MIN_IDEAL_DURATION_SRT, | |
text.length / CHARS_PER_SECOND | |
); | |
endTimeOverallSegment = startTime + estimatedDuration; | |
} | |
// Handle cases where timestamps might be out of order or too close | |
if ( endTimeOverallSegment <= startTime ) { | |
const fallbackDuration = Math.max( | |
MIN_IDEAL_DURATION_SRT, | |
text.length / CHARS_PER_SECOND | |
); | |
endTimeOverallSegment = startTime + fallbackDuration; | |
} | |
const chunks = splitTextIntoChunks( text, MAX_CHARS_PER_SRT_BLOCK ); | |
if ( chunks.length === 0 ) continue; | |
const idealDurations = chunks.map( ( ch ) => | |
Math.max( | |
MIN_IDEAL_DURATION_SRT, | |
Math.min( MAX_IDEAL_DURATION_SRT, ch.length / CHARS_PER_SECOND ) | |
) | |
); | |
const sumIdealDurations = idealDurations.reduce( ( sum, d ) => sum + d, 0 ); | |
const availableDurationForSegment = endTimeOverallSegment - startTime; | |
let currentChunkTime = startTime; | |
for ( let j = 0; j < chunks.length; j++ ) { | |
const chunkText = chunks[ j ]; | |
let durationForThisChunk; | |
if ( sumIdealDurations === 0 ) { | |
// Fallback if all ideal durations are zero | |
durationForThisChunk = availableDurationForSegment / chunks.length; | |
} else { | |
durationForThisChunk = | |
( idealDurations[ j ] / sumIdealDurations ) * availableDurationForSegment; | |
} | |
durationForThisChunk = Math.max( 0, durationForThisChunk ); // Ensure non-negative | |
let chunkStartTime = currentChunkTime; | |
let chunkEndTime = chunkStartTime + durationForThisChunk; | |
// Ensure the last chunk of a segment aligns with the segment's overall end time | |
if ( j === chunks.length - 1 ) { | |
chunkEndTime = endTimeOverallSegment; | |
} | |
// Ensure start < end for a valid subtitle. If not, try to give a minimal duration or skip. | |
if ( chunkEndTime <= chunkStartTime ) { | |
if ( chunkStartTime < endTimeOverallSegment ) { | |
// If there's any room at all | |
chunkEndTime = Math.min( chunkStartTime + 0.001, endTimeOverallSegment ); // Minimal 1ms duration | |
} else { | |
// No room for this chunk, skip it. | |
currentChunkTime = chunkEndTime; // Advance time cursor | |
continue; | |
} | |
} | |
const formattedText = formatTextForSrt( chunkText, MAX_CHARS_PER_LINE_SRT ); | |
if ( formattedText.length === 0 ) { | |
// Skip if formatting results in no text | |
currentChunkTime = chunkEndTime; // Advance time cursor | |
continue; | |
} | |
srtOutput.push( String( subtitleIndex++ ) ); | |
srtOutput.push( | |
`${ formatSrtTime( chunkStartTime ) } --> ${ formatSrtTime( chunkEndTime ) }` | |
); | |
srtOutput.push( formattedText ); | |
srtOutput.push( '' ); // Blank line separator | |
currentChunkTime = chunkEndTime; // Next chunk starts where this one ended | |
} | |
} | |
if ( srtOutput.length > 0 ) { | |
process.stdout.write( srtOutput.join( '\n' ) ); | |
} | |
} ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment