kadamwhite · May 13, 2025 18:50
diff --git a/loom-transcript-to-srt.js b/loom-transcript-to-srt.js
 #!/usr/bin/env node

 /**
 * This script is designed to receive piped content from a text file containing
 * a loom.com video transcript, which is usually copyable on the free plan as
 * a series of lines in the format
 *     0:02 What I said then, etc etc etc...
 *
 * The output of the script is SRT-format subtitle text. It can be output into
 * a .srt file for use in VLC or other subtitle-aware video players, or later
 * combination with a video source using ffmpeg.
 */

 const readline = require( 'readline' );

 // This is a nodejs file.
 // Constants
 const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
 const MAX_CHARS_PER_LINE_SRT = 35;
 const MAX_LINES_PER_SRT = 2;
 const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
 const MIN_IDEAL_DURATION_SRT = 0.8; // Minimum ideal duration for a subtitle segment
 const MAX_IDEAL_DURATION_SRT = 5.0; // Maximum ideal duration for a subtitle segment

 // Helper function to parse "M:SS" or "MM:SS" into seconds
 function parseTime( timeStr ) {
 	const parts = timeStr.split( ':' );
 	const minutes = parseInt( parts[ 0 ], 10 );
 	const seconds = parseInt( parts[ 1 ], 10 );
 	return minutes * 60 + seconds;
 }

 // Helper function to format seconds into HH:MM:SS,mmm
 function formatSrtTime( totalSeconds ) {
 	// Work with total milliseconds to avoid floating point inaccuracies and ripple effects
 	let totalMilliseconds = Math.round( totalSeconds * 1000 );

 	const hours = Math.floor( totalMilliseconds / 3600000 );
 	totalMilliseconds %= 3600000;
 	const minutes = Math.floor( totalMilliseconds / 60000 );
 	totalMilliseconds %= 60000;
 	const seconds = Math.floor( totalMilliseconds / 1000 );
 	const milliseconds = totalMilliseconds % 1000;

 	return `${ String( hours ).padStart( 2, '0' ) }:${ String( minutes ).padStart(
 		2,
 		'0'
 	) }:${ String( seconds ).padStart( 2, '0' ) },${ String( milliseconds ).padStart(
 		3,
 		'0'
 	) }`;
 }

 // Helper function to split text into chunks that fit MAX_CHARS_PER_SRT_BLOCK
 function splitTextIntoChunks( text, maxLength ) {
 	const chunks = [];
 	const words = text.split( /\s+/ ).filter( ( w ) => w.length > 0 );
 	if ( words.length === 0 ) return [];

 	let currentChunk = '';
 	for ( const word of words ) {
 		if ( currentChunk.length === 0 ) {
 			currentChunk = word;
 		} else if ( currentChunk.length + 1 + word.length <= maxLength ) {
 			currentChunk += ' ' + word;
 		} else {
 			chunks.push( currentChunk );
 			currentChunk = word;
 		}
 	}
 	if ( currentChunk.length > 0 ) {
 		chunks.push( currentChunk );
 	}
 	return chunks;
 }

 // Helper function to format a single chunk of text into at most MAX_LINES_PER_SRT lines
 function formatTextForSrt( textChunk, maxCharsPerLine ) {
 	const words = textChunk.split( /\s+/ ).filter( ( w ) => w.length > 0 );
 	if ( words.length === 0 ) return '';

 	const lines = [];
 	let currentLine = '';

 	for ( const word of words ) {
 		if ( MAX_LINES_PER_SRT > 0 && lines.length === MAX_LINES_PER_SRT ) {
 			// Already have max lines, append remaining words to the last line
 			lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + word;
 			continue;
 		}

 		if ( currentLine.length === 0 ) {
 			currentLine = word;
 		} else if ( currentLine.length + 1 + word.length <= maxCharsPerLine ) {
 			currentLine += ' ' + word;
 		} else {
 			lines.push( currentLine );
 			currentLine = word; // Start new line
 		}
 	}

 	// Add the last accumulated line, if any, and if there's space for it
 	if ( currentLine.length > 0 ) {
 		if ( lines.length < MAX_LINES_PER_SRT ) {
 			lines.push( currentLine );
 		} else if ( MAX_LINES_PER_SRT > 0 ) {
 			// Append to last line if max lines already formed
 			lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + currentLine;
 		}
 		// If MAX_LINES_PER_SRT is 0, currentLine is dropped (edge case)
 	}
 	return lines.join( '\n' );
 }

 const rl = readline.createInterface( {
 	input: process.stdin,
 	terminal: false, // Ensure it works with piped input
 } );

 const rawEntries = [];

 rl.on( 'line', ( line ) => {
 	const match = line.match( /^(\d{1,2}:\d{2})\s+(.*)$/ );
 	if ( match ) {
 		const timeStr = match[ 1 ];
 		const text = match[ 2 ].trim();
 		if ( text.length > 0 ) {
 			// Only process lines with actual text
 			rawEntries.push( { timeStr, text } );
 		}
 	}
 } );

 rl.on( 'close', () => {
 	const segments = rawEntries
 		.map( ( entry ) => ( {
 			timeSeconds: parseTime( entry.timeStr ),
 			text: entry.text,
 		} ) )
 		.sort( ( a, b ) => a.timeSeconds - b.timeSeconds ); // Sort by time

 	const srtOutput = [];
 	let subtitleIndex = 1;

 	for ( let i = 0; i < segments.length; i++ ) {
 		const currentSegment = segments[ i ];
 		const startTime = currentSegment.timeSeconds;
 		const text = currentSegment.text;

 		let endTimeOverallSegment;
 		if ( i + 1 < segments.length ) {
 			endTimeOverallSegment = segments[ i + 1 ].timeSeconds;
 		} else {
 			// For the last segment, estimate duration based on text length
 			const estimatedDuration = Math.max(
 				MIN_IDEAL_DURATION_SRT,
 				text.length / CHARS_PER_SECOND
 			);
 			endTimeOverallSegment = startTime + estimatedDuration;
 		}

 		// Handle cases where timestamps might be out of order or too close
 		if ( endTimeOverallSegment <= startTime ) {
 			const fallbackDuration = Math.max(
 				MIN_IDEAL_DURATION_SRT,
 				text.length / CHARS_PER_SECOND
 			);
 			endTimeOverallSegment = startTime + fallbackDuration;
 		}

 		const chunks = splitTextIntoChunks( text, MAX_CHARS_PER_SRT_BLOCK );
 		if ( chunks.length === 0 ) continue;

 		const idealDurations = chunks.map( ( ch ) =>
 			Math.max(
 				MIN_IDEAL_DURATION_SRT,
 				Math.min( MAX_IDEAL_DURATION_SRT, ch.length / CHARS_PER_SECOND )
 			)
 		);
 		const sumIdealDurations = idealDurations.reduce( ( sum, d ) => sum + d, 0 );

 		const availableDurationForSegment = endTimeOverallSegment - startTime;
 		let currentChunkTime = startTime;

 		for ( let j = 0; j < chunks.length; j++ ) {
 			const chunkText = chunks[ j ];
 			let durationForThisChunk;

 			if ( sumIdealDurations === 0 ) {
 				// Fallback if all ideal durations are zero
 				durationForThisChunk = availableDurationForSegment / chunks.length;
 			} else {
 				durationForThisChunk =
 					( idealDurations[ j ] / sumIdealDurations ) * availableDurationForSegment;
 			}
 			durationForThisChunk = Math.max( 0, durationForThisChunk ); // Ensure non-negative

 			let chunkStartTime = currentChunkTime;
 			let chunkEndTime = chunkStartTime + durationForThisChunk;

 			// Ensure the last chunk of a segment aligns with the segment's overall end time
 			if ( j === chunks.length - 1 ) {
 				chunkEndTime = endTimeOverallSegment;
 			}

 			// Ensure start < end for a valid subtitle. If not, try to give a minimal duration or skip.
 			if ( chunkEndTime <= chunkStartTime ) {
 				if ( chunkStartTime < endTimeOverallSegment ) {
 					// If there's any room at all
 					chunkEndTime = Math.min( chunkStartTime + 0.001, endTimeOverallSegment ); // Minimal 1ms duration
 				} else {
 					// No room for this chunk, skip it.
 					currentChunkTime = chunkEndTime; // Advance time cursor
 					continue;
 				}
 			}

 			const formattedText = formatTextForSrt( chunkText, MAX_CHARS_PER_LINE_SRT );
 			if ( formattedText.length === 0 ) {
 				// Skip if formatting results in no text
 				currentChunkTime = chunkEndTime; // Advance time cursor
 				continue;
 			}

 			srtOutput.push( String( subtitleIndex++ ) );
 			srtOutput.push(
 				`${ formatSrtTime( chunkStartTime ) } --> ${ formatSrtTime( chunkEndTime ) }`
 			);
 			srtOutput.push( formattedText );
 			srtOutput.push( '' ); // Blank line separator

 			currentChunkTime = chunkEndTime; // Next chunk starts where this one ended
 		}
 	}

 	if ( srtOutput.length > 0 ) {
 		process.stdout.write( srtOutput.join( '\n' ) );
 	}
 } );
	#!/usr/bin/env node

	/**
	* This script is designed to receive piped content from a text file containing
	* a loom.com video transcript, which is usually copyable on the free plan as
	* a series of lines in the format
	* 0:02 What I said then, etc etc etc...
	*
	* The output of the script is SRT-format subtitle text. It can be output into
	* a .srt file for use in VLC or other subtitle-aware video players, or later
	* combination with a video source using ffmpeg.
	*/

	const readline = require( 'readline' );

	// This is a nodejs file.
	// Constants
	const CHARS_PER_SECOND = 15; // Adjust as needed for reading speed
	const MAX_CHARS_PER_LINE_SRT = 35;
	const MAX_LINES_PER_SRT = 2;
	const MAX_CHARS_PER_SRT_BLOCK = MAX_CHARS_PER_LINE_SRT * MAX_LINES_PER_SRT;
	const MIN_IDEAL_DURATION_SRT = 0.8; // Minimum ideal duration for a subtitle segment
	const MAX_IDEAL_DURATION_SRT = 5.0; // Maximum ideal duration for a subtitle segment

	// Helper function to parse "M:SS" or "MM:SS" into seconds
	function parseTime( timeStr ) {
	const parts = timeStr.split( ':' );
	const minutes = parseInt( parts[ 0 ], 10 );
	const seconds = parseInt( parts[ 1 ], 10 );
	return minutes * 60 + seconds;
	}

	// Helper function to format seconds into HH:MM:SS,mmm
	function formatSrtTime( totalSeconds ) {
	// Work with total milliseconds to avoid floating point inaccuracies and ripple effects
	let totalMilliseconds = Math.round( totalSeconds * 1000 );

	const hours = Math.floor( totalMilliseconds / 3600000 );
	totalMilliseconds %= 3600000;
	const minutes = Math.floor( totalMilliseconds / 60000 );
	totalMilliseconds %= 60000;
	const seconds = Math.floor( totalMilliseconds / 1000 );
	const milliseconds = totalMilliseconds % 1000;

	return `${ String( hours ).padStart( 2, '0' ) }:${ String( minutes ).padStart(
	2,
	'0'
	) }:${ String( seconds ).padStart( 2, '0' ) },${ String( milliseconds ).padStart(
	3,
	'0'
	) }`;
	}

	// Helper function to split text into chunks that fit MAX_CHARS_PER_SRT_BLOCK
	function splitTextIntoChunks( text, maxLength ) {
	const chunks = [];
	const words = text.split( /\s+/ ).filter( ( w ) => w.length > 0 );
	if ( words.length === 0 ) return [];

	let currentChunk = '';
	for ( const word of words ) {
	if ( currentChunk.length === 0 ) {
	currentChunk = word;
	} else if ( currentChunk.length + 1 + word.length <= maxLength ) {
	currentChunk += ' ' + word;
	} else {
	chunks.push( currentChunk );
	currentChunk = word;
	}
	}
	if ( currentChunk.length > 0 ) {
	chunks.push( currentChunk );
	}
	return chunks;
	}

	// Helper function to format a single chunk of text into at most MAX_LINES_PER_SRT lines
	function formatTextForSrt( textChunk, maxCharsPerLine ) {
	const words = textChunk.split( /\s+/ ).filter( ( w ) => w.length > 0 );
	if ( words.length === 0 ) return '';

	const lines = [];
	let currentLine = '';

	for ( const word of words ) {
	if ( MAX_LINES_PER_SRT > 0 && lines.length === MAX_LINES_PER_SRT ) {
	// Already have max lines, append remaining words to the last line
	lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + word;
	continue;
	}

	if ( currentLine.length === 0 ) {
	currentLine = word;
	} else if ( currentLine.length + 1 + word.length <= maxCharsPerLine ) {
	currentLine += ' ' + word;
	} else {
	lines.push( currentLine );
	currentLine = word; // Start new line
	}
	}

	// Add the last accumulated line, if any, and if there's space for it
	if ( currentLine.length > 0 ) {
	if ( lines.length < MAX_LINES_PER_SRT ) {
	lines.push( currentLine );
	} else if ( MAX_LINES_PER_SRT > 0 ) {
	// Append to last line if max lines already formed
	lines[ MAX_LINES_PER_SRT - 1 ] += ' ' + currentLine;
	}
	// If MAX_LINES_PER_SRT is 0, currentLine is dropped (edge case)
	}
	return lines.join( '\n' );
	}

	const rl = readline.createInterface( {
	input: process.stdin,
	terminal: false, // Ensure it works with piped input
	} );

	const rawEntries = [];

	rl.on( 'line', ( line ) => {
	const match = line.match( /^(\d{1,2}:\d{2})\s+(.*)$/ );
	if ( match ) {
	const timeStr = match[ 1 ];
	const text = match[ 2 ].trim();
	if ( text.length > 0 ) {
	// Only process lines with actual text
	rawEntries.push( { timeStr, text } );
	}
	}
	} );

	rl.on( 'close', () => {
	const segments = rawEntries
	.map( ( entry ) => ( {
	timeSeconds: parseTime( entry.timeStr ),
	text: entry.text,
	} ) )
	.sort( ( a, b ) => a.timeSeconds - b.timeSeconds ); // Sort by time

	const srtOutput = [];
	let subtitleIndex = 1;

	for ( let i = 0; i < segments.length; i++ ) {
	const currentSegment = segments[ i ];
	const startTime = currentSegment.timeSeconds;
	const text = currentSegment.text;

	let endTimeOverallSegment;
	if ( i + 1 < segments.length ) {
	endTimeOverallSegment = segments[ i + 1 ].timeSeconds;
	} else {
	// For the last segment, estimate duration based on text length
	const estimatedDuration = Math.max(
	MIN_IDEAL_DURATION_SRT,
	text.length / CHARS_PER_SECOND
	);
	endTimeOverallSegment = startTime + estimatedDuration;
	}

	// Handle cases where timestamps might be out of order or too close
	if ( endTimeOverallSegment <= startTime ) {
	const fallbackDuration = Math.max(
	MIN_IDEAL_DURATION_SRT,
	text.length / CHARS_PER_SECOND
	);
	endTimeOverallSegment = startTime + fallbackDuration;
	}

	const chunks = splitTextIntoChunks( text, MAX_CHARS_PER_SRT_BLOCK );
	if ( chunks.length === 0 ) continue;

	const idealDurations = chunks.map( ( ch ) =>
	Math.max(
	MIN_IDEAL_DURATION_SRT,
	Math.min( MAX_IDEAL_DURATION_SRT, ch.length / CHARS_PER_SECOND )
	)
	);
	const sumIdealDurations = idealDurations.reduce( ( sum, d ) => sum + d, 0 );

	const availableDurationForSegment = endTimeOverallSegment - startTime;
	let currentChunkTime = startTime;

	for ( let j = 0; j < chunks.length; j++ ) {
	const chunkText = chunks[ j ];
	let durationForThisChunk;

	if ( sumIdealDurations === 0 ) {
	// Fallback if all ideal durations are zero
	durationForThisChunk = availableDurationForSegment / chunks.length;
	} else {
	durationForThisChunk =
	( idealDurations[ j ] / sumIdealDurations ) * availableDurationForSegment;
	}
	durationForThisChunk = Math.max( 0, durationForThisChunk ); // Ensure non-negative

	let chunkStartTime = currentChunkTime;
	let chunkEndTime = chunkStartTime + durationForThisChunk;

	// Ensure the last chunk of a segment aligns with the segment's overall end time
	if ( j === chunks.length - 1 ) {
	chunkEndTime = endTimeOverallSegment;
	}

	// Ensure start < end for a valid subtitle. If not, try to give a minimal duration or skip.
	if ( chunkEndTime <= chunkStartTime ) {
	if ( chunkStartTime < endTimeOverallSegment ) {
	// If there's any room at all
	chunkEndTime = Math.min( chunkStartTime + 0.001, endTimeOverallSegment ); // Minimal 1ms duration
	} else {
	// No room for this chunk, skip it.
	currentChunkTime = chunkEndTime; // Advance time cursor
	continue;
	}
	}

	const formattedText = formatTextForSrt( chunkText, MAX_CHARS_PER_LINE_SRT );
	if ( formattedText.length === 0 ) {
	// Skip if formatting results in no text
	currentChunkTime = chunkEndTime; // Advance time cursor
	continue;
	}

	srtOutput.push( String( subtitleIndex++ ) );
	srtOutput.push(
	`${ formatSrtTime( chunkStartTime ) } --> ${ formatSrtTime( chunkEndTime ) }`
	);
	srtOutput.push( formattedText );
	srtOutput.push( '' ); // Blank line separator

	currentChunkTime = chunkEndTime; // Next chunk starts where this one ended
	}
	}

	if ( srtOutput.length > 0 ) {
	process.stdout.write( srtOutput.join( '\n' ) );
	}
	} );