Last active
August 10, 2025 13:05
-
-
Save hobodrifterdavid/fa5ce46745eda3eb432c2098d8e8b029 to your computer and use it in GitHub Desktop.
batch-test-extract-mediabunny.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| import fs from 'fs/promises'; | |
| import path from 'path'; | |
| import { exec } from 'child_process'; | |
| import { promisify } from 'util'; | |
| import { | |
| Input, | |
| StreamSource, | |
| ALL_FORMATS, | |
| EncodedPacketSink, | |
| Conversion, | |
| Output, | |
| BufferTarget, | |
| Mp4OutputFormat, | |
| WebMOutputFormat, | |
| MkvOutputFormat, | |
| OggOutputFormat | |
| } from './mediabunny-fork/dist/bundles/mediabunny.mjs'; | |
| const execAsync = promisify(exec); | |
| // ANSI color codes and icons | |
| const colors = { | |
| reset: '\x1b[0m', | |
| bright: '\x1b[1m', | |
| dim: '\x1b[2m', | |
| red: '\x1b[31m', | |
| green: '\x1b[32m', | |
| yellow: '\x1b[33m', | |
| blue: '\x1b[34m', | |
| magenta: '\x1b[35m', | |
| cyan: '\x1b[36m', | |
| white: '\x1b[37m', | |
| bgRed: '\x1b[41m', | |
| bgGreen: '\x1b[42m', | |
| bgYellow: '\x1b[43m' | |
| }; | |
| const icons = { | |
| success: '✅', | |
| error: '❌', | |
| warning: '⚠️', | |
| info: 'ℹ️', | |
| processing: '⏳', | |
| complete: '✨', | |
| audio: '🔊', | |
| subtitle: '💬', | |
| file: '📄', | |
| folder: '📁', | |
| check: '✓', | |
| cross: '✗', | |
| arrow: '→', | |
| bullet: '•' | |
| }; | |
| // Configuration | |
| const MIN_FILE_SIZE = 100 * 1024 * 1024; // 100MB minimum | |
| const OUTPUT_BASE_DIR = './output'; | |
| const SUPPORTED_EXTENSIONS = ['.mkv', '.mp4', '.webm']; | |
| // Codec to container mapping for audio | |
| const CODEC_CONTAINER_MAP = { | |
| 'aac': { format: Mp4OutputFormat, ext: 'mp4' }, | |
| 'mp3': { format: null, ext: 'mp3' }, // Raw output | |
| 'opus': { format: WebMOutputFormat, ext: 'webm' }, | |
| 'vorbis': { format: OggOutputFormat, ext: 'ogg' }, | |
| 'ac3': { format: null, ext: 'ac3' }, // Raw output | |
| 'eac3': { format: null, ext: 'eac3' }, // Raw output | |
| 'dts': { format: null, ext: 'dts' }, // Raw output | |
| 'truehd': { format: null, ext: 'thd' }, // Raw output (TrueHD) | |
| 'flac': { format: null, ext: 'flac' }, // Raw output | |
| // Fallback | |
| 'default': { format: MkvOutputFormat, ext: 'mkv' } | |
| }; | |
| // Subtitle format mapping | |
| const SUBTITLE_FORMAT_MAP = { | |
| 'webvtt': 'vtt', | |
| 'subrip': 'srt', | |
| 'ass': 'ass', | |
| 'ssa': 'ssa', | |
| 'dvdsub': 'sub', | |
| 'dvbsub': 'sub', | |
| 'pgssub': 'sup', | |
| 'hdmv_pgs_subtitle': 'sup', | |
| 'S_TEXT/WEBVTT': 'vtt', | |
| 'S_TEXT/UTF8': 'srt', | |
| 'S_TEXT/ASS': 'ass', | |
| 'S_TEXT/SSA': 'ssa', | |
| 'S_HDMV/PGS': 'sup', | |
| 'S_DVBSUB': 'sub' | |
| }; | |
| // Utility functions | |
| function formatBytes(bytes) { | |
| if (bytes === 0) return '0 B'; | |
| const k = 1024; | |
| const sizes = ['B', 'KB', 'MB', 'GB']; | |
| const i = Math.floor(Math.log(bytes) / Math.log(k)); | |
| return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; | |
| } | |
| function sanitizeFilename(filename) { | |
| return filename | |
| .replace(/[<>:"/\\|?*\x00-\x1F]/g, '_') | |
| .replace(/\[/g, '_') | |
| .replace(/\]/g, '_') | |
| .replace(/\s+/g, '_') | |
| .replace(/\.+/g, '.') | |
| .replace(/_+/g, '_') | |
| .substring(0, 200); | |
| } | |
| function formatSRTTime(seconds) { | |
| const hours = Math.floor(seconds / 3600); | |
| const minutes = Math.floor((seconds % 3600) / 60); | |
| const secs = Math.floor(seconds % 60); | |
| const milliseconds = Math.floor((seconds % 1) * 1000); | |
| return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(secs).padStart(2, '0')},${String(milliseconds).padStart(3, '0')}`; | |
| } | |
| function convertToSRT(entries) { | |
| let srtContent = ''; | |
| entries.forEach((entry, index) => { | |
| const startTimeStr = formatSRTTime(entry.startTime); | |
| const endTimeStr = formatSRTTime(entry.endTime); | |
| srtContent += `${index + 1}\n`; | |
| srtContent += `${startTimeStr} --> ${endTimeStr}\n`; | |
| srtContent += `${entry.text}\n\n`; | |
| }); | |
| return srtContent.trim(); | |
| } | |
| async function getFilesRecursively(dir) { | |
| const files = []; | |
| async function scan(currentDir) { | |
| const entries = await fs.readdir(currentDir, { withFileTypes: true }); | |
| for (const entry of entries) { | |
| const fullPath = path.join(currentDir, entry.name); | |
| if (entry.isDirectory()) { | |
| await scan(fullPath); | |
| } else if (entry.isFile()) { | |
| const ext = path.extname(entry.name).toLowerCase(); | |
| if (SUPPORTED_EXTENSIONS.includes(ext)) { | |
| const stat = await fs.stat(fullPath); | |
| if (stat.size >= MIN_FILE_SIZE) { | |
| files.push({ | |
| path: fullPath, | |
| name: entry.name, | |
| size: stat.size | |
| }); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| await scan(dir); | |
| return files; | |
| } | |
| async function revalidateExtractedFiles(outputDir) { | |
| const result = { | |
| allValid: true, | |
| failureReason: '', | |
| audioTracks: [], | |
| subtitleTracks: [], | |
| audioResults: [], | |
| subtitleResults: [] | |
| }; | |
| try { | |
| // Get all files in the output directory | |
| const files = await fs.readdir(outputDir); | |
| // Find and validate audio files | |
| const audioFiles = files.filter(f => | |
| f.startsWith('audio_track_') && | |
| (f.endsWith('.mp4') || f.endsWith('.webm') || f.endsWith('.ogg') || | |
| f.endsWith('.mp3') || f.endsWith('.ac3') || f.endsWith('.eac3') || | |
| f.endsWith('.dts') || f.endsWith('.flac')) | |
| ); | |
| console.log(`\n${colors.dim} Re-validating ${audioFiles.length} audio file(s)...${colors.reset}`); | |
| for (const audioFile of audioFiles) { | |
| const filePath = path.join(outputDir, audioFile); | |
| const trackIndex = parseInt(audioFile.match(/audio_track_(\d+)/)?.[1] || '0'); | |
| // Extract codec from filename | |
| const codecMatch = audioFile.match(/_([^_]+)\.\w+$/); | |
| const codec = codecMatch ? codecMatch[1] : 'unknown'; | |
| console.log(` ${colors.dim}Checking Track ${trackIndex} (${codec}): ${audioFile}${colors.reset}`); | |
| // Validate with ffprobe | |
| const validation = await validateWithFfprobe(filePath); | |
| if (validation.valid) { | |
| console.log(` ${icons.check} ${colors.green}Valid: ${validation.duration}s, ${validation.bitRate} bps${colors.reset}`); | |
| // Test playback | |
| const playback = await testPlayback(filePath, 3); | |
| if (playback.playable) { | |
| const metric = playback.frames > 10000 ? `${(playback.frames/1024).toFixed(1)}MB` : `${playback.frames}KB`; | |
| console.log(` ${icons.check} ${colors.green}Playable: ${metric} decoded${colors.reset}`); | |
| } else { | |
| console.log(` ${icons.cross} ${colors.red}Playback failed: ${playback.error}${colors.reset}`); | |
| result.allValid = false; | |
| result.failureReason = `Audio track ${trackIndex} playback failed: ${playback.error}`; | |
| } | |
| result.audioResults.push({ | |
| trackIndex, | |
| codec, | |
| outputFile: filePath, | |
| success: true, | |
| validation, | |
| playback | |
| }); | |
| } else { | |
| console.log(` ${icons.cross} ${colors.red}Invalid: ${validation.error}${colors.reset}`); | |
| result.allValid = false; | |
| result.failureReason = `Audio track ${trackIndex} validation failed: ${validation.error}`; | |
| result.audioResults.push({ | |
| trackIndex, | |
| codec, | |
| outputFile: filePath, | |
| success: false, | |
| validation, | |
| error: validation.error | |
| }); | |
| } | |
| // Add to tracks list | |
| result.audioTracks.push({ | |
| index: trackIndex, | |
| codec, | |
| sampleRate: validation.sampleRate || 0, | |
| channels: validation.channels || 0 | |
| }); | |
| } | |
| // Find and check subtitle files | |
| const subtitleFiles = files.filter(f => | |
| f.startsWith('subtitle_track_') && | |
| (f.endsWith('.srt') || f.endsWith('.vtt') || f.endsWith('.ass') || f.endsWith('.ssa')) | |
| ); | |
| if (subtitleFiles.length > 0) { | |
| console.log(`\n${colors.dim} Re-validating ${subtitleFiles.length} subtitle file(s)...${colors.reset}`); | |
| for (const subtitleFile of subtitleFiles) { | |
| const filePath = path.join(outputDir, subtitleFile); | |
| const trackIndex = parseInt(subtitleFile.match(/subtitle_track_(\d+)/)?.[1] || '0'); | |
| console.log(` ${colors.dim}Checking Track ${trackIndex}: ${subtitleFile}${colors.reset}`); | |
| try { | |
| const content = await fs.readFile(filePath, 'utf-8'); | |
| const lines = content.split('\n').length; | |
| if (lines > 1) { | |
| console.log(` ${icons.check} ${colors.green}Valid: ${lines} lines${colors.reset}`); | |
| result.subtitleResults.push({ | |
| trackIndex, | |
| outputFile: filePath, | |
| success: true, | |
| lines | |
| }); | |
| } else { | |
| console.log(` ${icons.cross} ${colors.red}Invalid: Empty file${colors.reset}`); | |
| result.allValid = false; | |
| result.failureReason = `Subtitle track ${trackIndex} is empty`; | |
| result.subtitleResults.push({ | |
| trackIndex, | |
| outputFile: filePath, | |
| success: false, | |
| error: 'Empty file' | |
| }); | |
| } | |
| // Add to tracks list | |
| const langMatch = subtitleFile.match(/_([a-z]{3})\.\w+$/); | |
| result.subtitleTracks.push({ | |
| index: trackIndex, | |
| language: langMatch ? langMatch[1] : 'unknown' | |
| }); | |
| } catch (error) { | |
| console.log(` ${icons.cross} ${colors.red}Error reading file: ${error.message}${colors.reset}`); | |
| result.allValid = false; | |
| result.failureReason = `Subtitle track ${trackIndex} read error: ${error.message}`; | |
| result.subtitleResults.push({ | |
| trackIndex, | |
| outputFile: filePath, | |
| success: false, | |
| error: error.message | |
| }); | |
| } | |
| } | |
| } | |
| if (audioFiles.length === 0 && subtitleFiles.length === 0) { | |
| result.allValid = false; | |
| result.failureReason = 'No extracted files found in output directory'; | |
| } | |
| } catch (error) { | |
| result.allValid = false; | |
| result.failureReason = `Error accessing output directory: ${error.message}`; | |
| } | |
| return result; | |
| } | |
| async function checkPreviousExtraction(outputDir) { | |
| try { | |
| // Check if the output directory exists | |
| await fs.access(outputDir); | |
| // Check if there's a report file | |
| const reportPath = path.join(outputDir, 'report.txt'); | |
| const reportContent = await fs.readFile(reportPath, 'utf-8').catch(() => null); | |
| if (!reportContent) { | |
| return { exists: true, success: false, reason: 'No report file found' }; | |
| } | |
| // Parse the report to check extraction results | |
| const lines = reportContent.split('\n'); | |
| let totalAudioTracks = 0; | |
| let successfulAudioTracks = 0; | |
| let totalSubtitleTracks = 0; | |
| let successfulSubtitleTracks = 0; | |
| let hasExtractionResults = false; | |
| for (const line of lines) { | |
| if (line.includes('Audio Tracks:')) { | |
| // Count total audio tracks | |
| const trackLines = lines.slice(lines.indexOf(line) + 1); | |
| for (const trackLine of trackLines) { | |
| if (trackLine.match(/^\s*Track \d+:/)) { | |
| totalAudioTracks++; | |
| } else if (!trackLine.trim().startsWith('Track')) { | |
| break; | |
| } | |
| } | |
| } | |
| if (line.includes('Subtitle Tracks:')) { | |
| // Count total subtitle tracks | |
| const trackLines = lines.slice(lines.indexOf(line) + 1); | |
| for (const trackLine of trackLines) { | |
| if (trackLine.match(/^\s*Track \d+:/)) { | |
| totalSubtitleTracks++; | |
| } else if (!trackLine.trim().startsWith('Track')) { | |
| break; | |
| } | |
| } | |
| } | |
| if (line.includes('Audio Extraction Results:')) { | |
| hasExtractionResults = true; | |
| const resultLines = lines.slice(lines.indexOf(line) + 1); | |
| for (const resultLine of resultLines) { | |
| // Stop at the next section or empty line | |
| if (!resultLine.trim() || resultLine.includes('Subtitle Extraction Results:') || | |
| resultLine.includes('Errors:') || resultLine.includes('Timing:')) { | |
| break; | |
| } | |
| if (resultLine.includes('✓') && resultLine.includes('Track')) { | |
| successfulAudioTracks++; | |
| } | |
| } | |
| } | |
| if (line.includes('Subtitle Extraction Results:')) { | |
| const resultLines = lines.slice(lines.indexOf(line) + 1); | |
| for (const resultLine of resultLines) { | |
| // Stop at the next section or empty line | |
| if (!resultLine.trim() || resultLine.includes('Errors:') || | |
| resultLine.includes('Timing:') || resultLine.includes('Audio Extraction Results:')) { | |
| break; | |
| } | |
| if (resultLine.includes('✓') && resultLine.includes('Track')) { | |
| successfulSubtitleTracks++; | |
| } | |
| } | |
| } | |
| } | |
| if (!hasExtractionResults) { | |
| return { exists: true, success: false, reason: 'No extraction results in report' }; | |
| } | |
| const allSuccess = (totalAudioTracks === 0 || successfulAudioTracks === totalAudioTracks) && | |
| (totalSubtitleTracks === 0 || successfulSubtitleTracks === totalSubtitleTracks); | |
| return { | |
| exists: true, | |
| success: allSuccess, | |
| totalAudioTracks, | |
| successfulAudioTracks, | |
| totalSubtitleTracks, | |
| successfulSubtitleTracks, | |
| reason: allSuccess ? 'All tracks extracted successfully' : | |
| `Audio: ${successfulAudioTracks}/${totalAudioTracks}, Subtitles: ${successfulSubtitleTracks}/${totalSubtitleTracks}` | |
| }; | |
| } catch (error) { | |
| // Directory doesn't exist | |
| return { exists: false, success: false, reason: 'Output directory does not exist' }; | |
| } | |
| } | |
| async function validateWithFfprobe(filePath) { | |
| try { | |
| const { stdout } = await execAsync( | |
| `ffprobe -v error -show_format -show_streams -of json "${filePath}"` | |
| ); | |
| const info = JSON.parse(stdout); | |
| return { | |
| valid: true, | |
| duration: info.format?.duration || 'unknown', | |
| bitRate: info.format?.bit_rate || 'unknown', | |
| streams: info.streams?.length || 0, | |
| codec: info.streams?.[0]?.codec_name || 'unknown' | |
| }; | |
| } catch (error) { | |
| return { | |
| valid: false, | |
| error: error.message | |
| }; | |
| } | |
| } | |
| async function testPlayback(filePath, durationSeconds = 5) { | |
| try { | |
| // Use ffmpeg to decode a few seconds of audio to /dev/null | |
| // This tests if the file is actually decodable/playable | |
| const { stderr } = await execAsync( | |
| `ffmpeg -i "${filePath}" -t ${durationSeconds} -f null -`, | |
| { encoding: 'utf8' } | |
| ); | |
| // Check for any errors in stderr | |
| if (stderr.includes('Error') || stderr.includes('Invalid')) { | |
| return { | |
| playable: false, | |
| error: 'Decoding errors found' | |
| }; | |
| } | |
| // Check if audio was decoded (most reliable for audio files) | |
| const audioMatch = stderr.match(/audio:(\d+)KiB/); | |
| if (audioMatch) { | |
| const audioKB = parseInt(audioMatch[1]); | |
| return { | |
| playable: audioKB > 0, | |
| frames: audioKB, // Report KB of audio decoded instead of frames | |
| error: audioKB === 0 ? 'No audio decoded' : null | |
| }; | |
| } | |
| // Check if time progressed (works for both audio and video) | |
| const timeMatch = stderr.match(/time=(\d{2}):(\d{2}):(\d{2}\.\d+)/); | |
| if (timeMatch) { | |
| const hours = parseInt(timeMatch[1]); | |
| const minutes = parseInt(timeMatch[2]); | |
| const seconds = parseFloat(timeMatch[3]); | |
| const totalSeconds = hours * 3600 + minutes * 60 + seconds; | |
| return { | |
| playable: totalSeconds > 0, | |
| frames: Math.floor(totalSeconds * 1000), // Report milliseconds instead of frames | |
| error: totalSeconds === 0 ? 'No time progressed' : null | |
| }; | |
| } | |
| // Fallback: Look for video frames (for video files) | |
| const frameMatch = stderr.match(/frame=\s*(\d+)/); | |
| const frames = frameMatch ? parseInt(frameMatch[1]) : 0; | |
| return { | |
| playable: frames > 0, | |
| frames: frames, | |
| error: frames === 0 ? 'No decoding detected' : null | |
| }; | |
| } catch (error) { | |
| return { | |
| playable: false, | |
| error: error.message | |
| }; | |
| } | |
| } | |
| async function extractAudioTrack(input, track, trackIndex, outputDir) { | |
| const result = { | |
| trackIndex, | |
| codec: track.codec, | |
| success: false, | |
| outputFile: null, | |
| error: null, | |
| validation: null, | |
| playback: null, | |
| extractionTime: 0, | |
| realtimeRate: null | |
| }; | |
| const startTime = Date.now(); | |
| try { | |
| const containerInfo = CODEC_CONTAINER_MAP[track.codec] || CODEC_CONTAINER_MAP.default; | |
| const outputFile = path.join(outputDir, `audio_track_${trackIndex}_${track.codec}.${containerInfo.ext}`); | |
| if (containerInfo.format === null) { | |
| // Raw extraction for codecs without container | |
| console.log(` ${colors.dim}Extracting raw ${track.codec}...${colors.reset}`); | |
| const sink = new EncodedPacketSink(track); | |
| const packets = []; | |
| let totalSize = 0; | |
| for await (const packet of sink.packets()) { | |
| packets.push(packet.data); | |
| totalSize += packet.data.length; | |
| } | |
| const combined = new Uint8Array(totalSize); | |
| let offset = 0; | |
| for (const packet of packets) { | |
| combined.set(packet, offset); | |
| offset += packet.length; | |
| } | |
| await fs.writeFile(outputFile, combined); | |
| result.outputFile = outputFile; | |
| result.success = true; | |
| console.log(` ${icons.check} ${colors.green}Extracted ${formatBytes(totalSize)} of raw ${track.codec}${colors.reset}`); | |
| } else { | |
| // Container extraction using the FIXED Conversion API with track selection | |
| console.log(` ${colors.dim}Extracting ${track.codec} to ${containerInfo.ext} container (track ${trackIndex} only)...${colors.reset}`); | |
| const target = new BufferTarget(); | |
| const output = new Output({ | |
| format: new containerInfo.format(), | |
| target: target | |
| }); | |
| // Use the fixed track selection API | |
| const conversion = await Conversion.init({ | |
| input: input, | |
| output: output, | |
| video: { discard: true }, | |
| tracks: { | |
| indices: [trackIndex], // Select only this specific audio track | |
| audioOnly: true // Filter to audio tracks first (now works correctly!) | |
| } | |
| }); | |
| await conversion.execute(); | |
| await fs.writeFile(outputFile, new Uint8Array(target.buffer)); | |
| result.outputFile = outputFile; | |
| result.success = true; | |
| // Calculate timing | |
| result.extractionTime = (Date.now() - startTime) / 1000; // in seconds | |
| console.log(` ${icons.check} ${colors.green}Extracted ${formatBytes(target.buffer.byteLength)} to ${containerInfo.ext}${colors.reset}`); | |
| } | |
| // Validate output | |
| console.log(` ${colors.dim}Validating output...${colors.reset}`); | |
| result.validation = await validateWithFfprobe(outputFile); | |
| if (result.validation.valid) { | |
| console.log(` ${icons.check} ${colors.green}Valid ${result.validation.codec} file (${result.validation.duration}s, ${result.validation.bitRate} bps)${colors.reset}`); | |
| // Test playback | |
| console.log(` ${colors.dim}Testing playback...${colors.reset}`); | |
| result.playback = await testPlayback(outputFile, 3); | |
| if (result.playback.playable) { | |
| const metric = result.playback.frames > 10000 ? `${(result.playback.frames/1024).toFixed(1)}MB` : `${result.playback.frames}KB`; | |
| console.log(` ${icons.check} ${colors.green}Playback successful (${metric} decoded)${colors.reset}`); | |
| } else { | |
| console.log(` ${icons.cross} ${colors.red}Playback failed: ${result.playback.error}${colors.reset}`); | |
| } | |
| } else { | |
| console.log(` ${icons.cross} ${colors.red}Validation failed: ${result.validation.error}${colors.reset}`); | |
| } | |
| // Calculate realtime rate | |
| if (result.validation?.duration && result.extractionTime > 0) { | |
| const mediaDuration = parseFloat(result.validation.duration); | |
| if (!isNaN(mediaDuration)) { | |
| result.realtimeRate = mediaDuration / result.extractionTime; | |
| } | |
| } | |
| } catch (error) { | |
| result.error = error.message; | |
| console.log(` ${icons.error} ${colors.red}Audio extraction failed: ${error.message}${colors.reset}`); | |
| } | |
| return result; | |
| } | |
| async function extractSubtitleTrack(subtitleTrack, trackIndex, outputDir) { | |
| const result = { | |
| trackIndex, | |
| codec: subtitleTrack.codec || 'unknown', | |
| format: subtitleTrack.format || 'unknown', | |
| language: subtitleTrack.languageCode || 'unknown', | |
| success: false, | |
| outputFile: null, | |
| error: null, | |
| extractionTime: 0 | |
| }; | |
| const startTime = Date.now(); | |
| try { | |
| console.log(` ${colors.dim}Extracting subtitle track ${trackIndex} (${result.codec})...${colors.reset}`); | |
| // Determine output format based on codec | |
| let extension = 'srt'; // Default to SRT | |
| const codecLower = (subtitleTrack.codec || '').toLowerCase(); | |
| const codecIdLower = (subtitleTrack.codecId || '').toLowerCase(); | |
| // Check various codec identifiers | |
| for (const [key, ext] of Object.entries(SUBTITLE_FORMAT_MAP)) { | |
| if (codecLower.includes(key.toLowerCase()) || | |
| codecIdLower.includes(key.toLowerCase()) || | |
| codecIdLower === key) { | |
| extension = ext; | |
| break; | |
| } | |
| } | |
| const outputFile = path.join(outputDir, `subtitle_track_${trackIndex}_${result.language}.${extension}`); | |
| // Use EncodedPacketSink to extract subtitle packets | |
| const sink = new EncodedPacketSink(subtitleTrack); | |
| const subtitleEntries = []; | |
| const decoder = new TextDecoder('utf-8'); | |
| for await (const packet of sink.packets()) { | |
| const text = decoder.decode(packet.data).trim(); | |
| if (text) { | |
| subtitleEntries.push({ | |
| startTime: packet.timestamp, | |
| endTime: packet.timestamp + packet.duration, | |
| text: text | |
| }); | |
| } | |
| } | |
| if (subtitleEntries.length > 0) { | |
| // Convert to appropriate format | |
| let content; | |
| if (extension === 'srt') { | |
| content = convertToSRT(subtitleEntries); | |
| } else if (extension === 'vtt') { | |
| // Simple WebVTT conversion | |
| content = 'WEBVTT\n\n' + convertToSRT(subtitleEntries).replace(/,/g, '.'); | |
| } else { | |
| // For other formats, save raw text | |
| content = subtitleEntries.map(e => e.text).join('\n\n'); | |
| } | |
| await fs.writeFile(outputFile, content, 'utf-8'); | |
| result.outputFile = outputFile; | |
| result.success = true; | |
| console.log(` ${icons.check} ${colors.green}Extracted ${subtitleEntries.length} subtitle entries to ${extension}${colors.reset}`); | |
| } else { | |
| throw new Error('No subtitle entries found'); | |
| } | |
| result.extractionTime = (Date.now() - startTime) / 1000; | |
| } catch (error) { | |
| result.error = error.message; | |
| console.log(` ${icons.error} ${colors.red}Subtitle extraction failed: ${error.message}${colors.reset}`); | |
| } | |
| return result; | |
| } | |
| async function processVideoFile(file, forceReprocess = false) { | |
| const baseName = sanitizeFilename(path.basename(file.name, path.extname(file.name))); | |
| const outputDir = path.join(OUTPUT_BASE_DIR, baseName); | |
| console.log(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`); | |
| console.log(`${icons.processing} ${colors.bright}${colors.blue}Processing: ${file.name}${colors.reset}`); | |
| console.log(`${colors.dim} Size: ${formatBytes(file.size)}${colors.reset}`); | |
| console.log(`${colors.dim} Output dir: ${outputDir}${colors.reset}`); | |
| // Check if already processed | |
| if (!forceReprocess) { | |
| const previousResult = await checkPreviousExtraction(outputDir); | |
| if (previousResult.exists && previousResult.success) { | |
| // Files were extracted previously - let's re-validate them | |
| console.log(`${icons.info} ${colors.cyan}Found previous extraction - re-validating files...${colors.reset}`); | |
| console.log(` ${colors.dim}Audio: ${previousResult.successfulAudioTracks}/${previousResult.totalAudioTracks} tracks${colors.reset}`); | |
| console.log(` ${colors.dim}Subtitles: ${previousResult.successfulSubtitleTracks}/${previousResult.totalSubtitleTracks} tracks${colors.reset}`); | |
| // Re-validate and test all extracted files | |
| const validationReport = await revalidateExtractedFiles(outputDir); | |
| if (validationReport.allValid) { | |
| console.log(`${icons.success} ${colors.green}All extracted files validated successfully!${colors.reset}`); | |
| return { | |
| file: file.name, | |
| size: file.size, | |
| skipped: true, | |
| revalidated: true, | |
| reason: 'All extracted files re-validated successfully', | |
| audioTracks: validationReport.audioTracks, | |
| subtitleTracks: validationReport.subtitleTracks, | |
| audioResults: validationReport.audioResults, | |
| subtitleResults: validationReport.subtitleResults, | |
| errors: [] | |
| }; | |
| } else { | |
| console.log(`${icons.warning} ${colors.yellow}Re-validation failed - re-extracting...${colors.reset}`); | |
| console.log(` ${colors.dim}${validationReport.failureReason}${colors.reset}`); | |
| // Continue with re-extraction | |
| } | |
| } else if (previousResult.exists) { | |
| console.log(`${icons.warning} ${colors.yellow}Previous extraction incomplete: ${previousResult.reason}${colors.reset}`); | |
| console.log(` ${colors.dim}Re-processing file...${colors.reset}`); | |
| } | |
| } | |
| const report = { | |
| file: file.name, | |
| size: file.size, | |
| skipped: false, | |
| audioTracks: [], | |
| subtitleTracks: [], | |
| audioResults: [], | |
| subtitleResults: [], | |
| errors: [] | |
| }; | |
| let fileHandle; | |
| try { | |
| // Create output directory | |
| await fs.mkdir(outputDir, { recursive: true }); | |
| // Open file | |
| fileHandle = await fs.open(file.path, 'r'); | |
| // Create StreamSource | |
| const streamSource = new StreamSource({ | |
| read: async (start, end) => { | |
| const size = end - start; | |
| const buffer = Buffer.allocUnsafe(size); | |
| const { bytesRead } = await fileHandle.read(buffer, 0, size, start); | |
| return new Uint8Array(buffer.slice(0, bytesRead)); | |
| }, | |
| getSize: async () => file.size | |
| }); | |
| // Create Input | |
| const input = new Input({ | |
| source: streamSource, | |
| formats: ALL_FORMATS | |
| }); | |
| // Get audio tracks | |
| const audioTracks = await input.getAudioTracks(); | |
| console.log(`${icons.audio} ${colors.cyan}Found ${audioTracks.length} audio track(s):${colors.reset}`); | |
| audioTracks.forEach((track, i) => { | |
| const info = ` ${colors.dim}${icons.bullet}${colors.reset} Audio Track ${i}: ${colors.yellow}${track.codec}${colors.reset}, ${track.sampleRate}Hz, ${track.numberOfChannels}ch`; | |
| console.log(info); | |
| report.audioTracks.push({ | |
| index: i, | |
| codec: track.codec, | |
| sampleRate: track.sampleRate, | |
| channels: track.numberOfChannels, | |
| language: track.languageCode || 'unknown' | |
| }); | |
| }); | |
| // Get subtitle tracks | |
| let subtitleTracks = []; | |
| try { | |
| if (input.getSubtitleTracks) { | |
| subtitleTracks = await input.getSubtitleTracks(); | |
| console.log(`${icons.subtitle} ${colors.cyan}Found ${subtitleTracks.length} subtitle track(s):${colors.reset}`); | |
| subtitleTracks.forEach((track, i) => { | |
| const codecInfo = track.codec || track.codecId || 'unknown'; | |
| const langInfo = track.languageCode || track.language || 'unknown'; | |
| const info = ` ${colors.dim}${icons.bullet}${colors.reset} Subtitle Track ${i}: ${colors.yellow}${codecInfo}${colors.reset} (${langInfo})`; | |
| console.log(info); | |
| report.subtitleTracks.push({ | |
| index: i, | |
| codec: codecInfo, | |
| format: track.format || 'unknown', | |
| language: langInfo, | |
| name: track.name || null | |
| }); | |
| }); | |
| } else { | |
| console.log('Subtitle extraction not supported in this MediaBunny version'); | |
| } | |
| } catch (error) { | |
| console.log(`Could not get subtitle tracks: ${error.message}`); | |
| } | |
| // Extract audio tracks individually using the fixed track selection | |
| if (audioTracks.length > 0) { | |
| console.log(`\n${colors.bright}${colors.magenta}Extracting audio tracks:${colors.reset}`); | |
| for (let i = 0; i < audioTracks.length; i++) { | |
| console.log(` ${colors.cyan}Track ${i} (${audioTracks[i].codec}):${colors.reset}`); | |
| const result = await extractAudioTrack(input, audioTracks[i], i, outputDir); | |
| report.audioResults.push(result); | |
| } | |
| } | |
| // Extract subtitle tracks | |
| if (subtitleTracks.length > 0) { | |
| console.log(`\n${colors.bright}${colors.magenta}Extracting subtitle tracks:${colors.reset}`); | |
| for (let i = 0; i < subtitleTracks.length; i++) { | |
| console.log(` ${colors.cyan}Track ${i}:${colors.reset}`); | |
| const result = await extractSubtitleTrack(subtitleTracks[i], i, outputDir); | |
| report.subtitleResults.push(result); | |
| } | |
| } | |
| } catch (error) { | |
| console.log(`\n${icons.error} ${colors.red}Error processing file: ${error.message}${colors.reset}`); | |
| report.errors.push(error.message); | |
| } finally { | |
| if (fileHandle) { | |
| await fileHandle.close(); | |
| } | |
| } | |
| // Write report | |
| const reportPath = path.join(outputDir, 'report.txt'); | |
| const reportContent = generateReport(report); | |
| await fs.writeFile(reportPath, reportContent); | |
| return report; | |
| } | |
| function generateReport(report) { | |
| const lines = []; | |
| lines.push('MediaBunny Test Report'); | |
| lines.push('====================='); | |
| lines.push(`File: ${report.file}`); | |
| lines.push(`Size: ${formatBytes(report.size)}`); | |
| lines.push(`Date: ${new Date().toISOString()}`); | |
| if (report.skipped) { | |
| lines.push(`Status: SKIPPED`); | |
| lines.push(`Reason: ${report.reason || 'Already processed successfully'}`); | |
| } else { | |
| if (report.audioTracks.length > 0) { | |
| lines.push('\nAudio Tracks:'); | |
| report.audioTracks.forEach(track => { | |
| lines.push(` Track ${track.index}: ${track.codec} ${track.sampleRate}Hz ${track.channels}ch (${track.language})`); | |
| }); | |
| } | |
| if (report.subtitleTracks.length > 0) { | |
| lines.push('\nSubtitle Tracks:'); | |
| report.subtitleTracks.forEach(track => { | |
| const name = track.name ? ` "${track.name}"` : ''; | |
| lines.push(` Track ${track.index}: ${track.codec} (${track.language})${name}`); | |
| }); | |
| } | |
| if (report.audioResults.length > 0) { | |
| lines.push('\nAudio Extraction Results:'); | |
| report.audioResults.forEach(result => { | |
| const status = result.success ? '✓' : '✗'; | |
| const validation = result.validation?.valid ? 'valid' : 'invalid'; | |
| const playback = result.playback?.playable ? 'playable' : 'not-playable'; | |
| const timing = result.extractionTime ? | |
| `${result.extractionTime.toFixed(2)}s` + | |
| (result.realtimeRate ? `, ${result.realtimeRate.toFixed(1)}x` : '') : | |
| ''; | |
| if (result.success) { | |
| lines.push(` Track ${result.trackIndex}: ${status} ${result.codec} -> ${path.basename(result.outputFile)} (${validation}, ${playback}) [${timing}]`); | |
| } else { | |
| lines.push(` Track ${result.trackIndex}: ${status} ${result.codec} -> failed (${validation}, ${playback}) [${timing}]`); | |
| if (result.error) { | |
| lines.push(` Error: ${result.error}`); | |
| } | |
| } | |
| }); | |
| } | |
| if (report.subtitleResults.length > 0) { | |
| lines.push('\nSubtitle Extraction Results:'); | |
| report.subtitleResults.forEach(result => { | |
| const status = result.success ? '✓' : '✗'; | |
| const timing = result.extractionTime ? ` [${result.extractionTime.toFixed(2)}s]` : ''; | |
| if (result.success) { | |
| lines.push(` Track ${result.trackIndex}: ${status} ${result.codec} -> ${path.basename(result.outputFile)}${timing}`); | |
| } else { | |
| lines.push(` Track ${result.trackIndex}: ${status} ${result.codec} -> failed${timing}`); | |
| if (result.error) { | |
| lines.push(` Error: ${result.error}`); | |
| } | |
| } | |
| }); | |
| } | |
| if (report.errors.length > 0) { | |
| lines.push('\nErrors:'); | |
| report.errors.forEach(error => { | |
| lines.push(` - ${error}`); | |
| }); | |
| } | |
| } | |
| return lines.join('\n'); | |
| } | |
| function generateSummary(allReports, directory, startTime) { | |
| const lines = []; | |
| const totalTime = (Date.now() - startTime) / 1000; | |
| lines.push('MediaBunny Batch Test Summary'); | |
| lines.push('============================='); | |
| lines.push(`Date: ${new Date().toISOString()}`); | |
| lines.push(`Directory: ${directory}`); | |
| lines.push(''); | |
| // Statistics | |
| const totalFiles = allReports.length; | |
| const skippedFiles = allReports.filter(r => r.skipped).length; | |
| const processedFiles = totalFiles - skippedFiles; | |
| const totalAudioTracks = allReports.reduce((sum, r) => sum + r.audioTracks.length, 0); | |
| const successfulAudioExtractions = allReports.reduce((sum, r) => | |
| sum + r.audioResults.filter(res => res.success).length, 0); | |
| const validAudioOutputs = allReports.reduce((sum, r) => | |
| sum + r.audioResults.filter(res => res.validation?.valid).length, 0); | |
| const totalSubtitleTracks = allReports.reduce((sum, r) => sum + r.subtitleTracks.length, 0); | |
| const successfulSubtitleExtractions = allReports.reduce((sum, r) => | |
| sum + r.subtitleResults.filter(res => res.success).length, 0); | |
| lines.push('Statistics:'); | |
| lines.push(` Files found: ${totalFiles}`); | |
| lines.push(` Files processed: ${processedFiles}`); | |
| lines.push(` Files skipped: ${skippedFiles}`); | |
| lines.push(''); | |
| lines.push(' Audio tracks:'); | |
| lines.push(` Total: ${totalAudioTracks}`); | |
| lines.push(` Successfully extracted: ${successfulAudioExtractions}/${totalAudioTracks}`); | |
| lines.push(` Valid outputs: ${validAudioOutputs}/${successfulAudioExtractions}`); | |
| lines.push(''); | |
| lines.push(' Subtitle tracks:'); | |
| lines.push(` Total: ${totalSubtitleTracks}`); | |
| lines.push(` Successfully extracted: ${successfulSubtitleExtractions}/${totalSubtitleTracks}`); | |
| lines.push(''); | |
| // Codec distribution | |
| const audioCodecCounts = {}; | |
| const subtitleCodecCounts = {}; | |
| allReports.forEach(report => { | |
| report.audioTracks.forEach(track => { | |
| audioCodecCounts[track.codec] = (audioCodecCounts[track.codec] || 0) + 1; | |
| }); | |
| report.subtitleTracks.forEach(track => { | |
| subtitleCodecCounts[track.codec] = (subtitleCodecCounts[track.codec] || 0) + 1; | |
| }); | |
| }); | |
| if (Object.keys(audioCodecCounts).length > 0) { | |
| lines.push('Audio Codec Distribution:'); | |
| Object.entries(audioCodecCounts) | |
| .sort((a, b) => b[1] - a[1]) | |
| .forEach(([codec, count]) => { | |
| lines.push(` ${codec}: ${count}`); | |
| }); | |
| lines.push(''); | |
| } | |
| if (Object.keys(subtitleCodecCounts).length > 0) { | |
| lines.push('Subtitle Codec Distribution:'); | |
| Object.entries(subtitleCodecCounts) | |
| .sort((a, b) => b[1] - a[1]) | |
| .forEach(([codec, count]) => { | |
| lines.push(` ${codec}: ${count}`); | |
| }); | |
| lines.push(''); | |
| } | |
| // File results | |
| lines.push('File Results:'); | |
| allReports.forEach(report => { | |
| const audioSuccess = report.audioResults.filter(r => r.success).length; | |
| const audioTotal = report.audioTracks.length; | |
| const subtitleSuccess = report.subtitleResults.filter(r => r.success).length; | |
| const subtitleTotal = report.subtitleTracks.length; | |
| const skipIndicator = report.skipped ? ' [SKIPPED]' : ''; | |
| lines.push(` ${report.file}:`); | |
| lines.push(` Audio: ${audioSuccess}/${audioTotal} tracks${skipIndicator}`); | |
| if (subtitleTotal > 0) { | |
| lines.push(` Subtitles: ${subtitleSuccess}/${subtitleTotal} tracks${skipIndicator}`); | |
| } | |
| }); | |
| // Failed files | |
| const failedReports = allReports.filter(r => | |
| !r.skipped && ( | |
| r.audioResults.some(res => !res.success) || | |
| r.subtitleResults.some(res => !res.success) | |
| ) | |
| ); | |
| if (failedReports.length > 0) { | |
| lines.push(''); | |
| lines.push('Files with failed extractions:'); | |
| failedReports.forEach(report => { | |
| const failedAudio = report.audioResults.filter(r => !r.success).length; | |
| const failedSubtitles = report.subtitleResults.filter(r => !r.success).length; | |
| const failures = []; | |
| if (failedAudio > 0) failures.push(`${failedAudio} audio`); | |
| if (failedSubtitles > 0) failures.push(`${failedSubtitles} subtitle`); | |
| lines.push(` - ${report.file}: ${failures.join(', ')} track(s) failed`); | |
| }); | |
| } | |
| lines.push(''); | |
| lines.push(`Total processing time: ${totalTime.toFixed(2)} seconds`); | |
| return lines.join('\n'); | |
| } | |
| // Main function | |
| async function main() { | |
| const args = process.argv.slice(2); | |
| // Check for force flag | |
| const forceReprocess = args.includes('--force'); | |
| const filteredArgs = args.filter(arg => arg !== '--force'); | |
| if (filteredArgs.length === 0) { | |
| console.log('Usage: node batch-test-mediabunny.mjs [--force] <input-path>'); | |
| console.log(' <input-path> can be a video file or directory'); | |
| console.log(' --force Re-process all files even if already extracted'); | |
| console.log(''); | |
| console.log('Features:'); | |
| console.log(' - Extracts audio tracks individually using fixed track selection'); | |
| console.log(' - Extracts subtitle tracks when available'); | |
| console.log(' - Validates output files with ffprobe'); | |
| console.log(' - Tests playback of audio files'); | |
| console.log(' - Incremental processing (skip already extracted files)'); | |
| process.exit(1); | |
| } | |
| const inputPath = path.resolve(filteredArgs[0]); | |
| const startTime = Date.now(); | |
| try { | |
| const stat = await fs.stat(inputPath); | |
| let files = []; | |
| if (stat.isDirectory()) { | |
| console.log(`Scanning directory: ${inputPath}`); | |
| files = await getFilesRecursively(inputPath); | |
| console.log(`Found ${files.length} video files (>100MB)`); | |
| } else if (stat.isFile()) { | |
| const ext = path.extname(inputPath).toLowerCase(); | |
| if (SUPPORTED_EXTENSIONS.includes(ext)) { | |
| files = [{ | |
| path: inputPath, | |
| name: path.basename(inputPath), | |
| size: stat.size | |
| }]; | |
| } else { | |
| console.error(`Unsupported file extension: ${ext}`); | |
| process.exit(1); | |
| } | |
| } else { | |
| console.error('Input must be a file or directory'); | |
| process.exit(1); | |
| } | |
| if (files.length === 0) { | |
| console.log('No video files found'); | |
| process.exit(0); | |
| } | |
| if (forceReprocess) { | |
| console.log(`\n${icons.warning} ${colors.yellow}Force mode: Re-processing all files${colors.reset}`); | |
| } else { | |
| console.log(`\n${icons.info} ${colors.cyan}Incremental mode: Skipping successfully extracted files${colors.reset}`); | |
| } | |
| console.log(`\n${colors.bright}${colors.magenta}Using FIXED MediaBunny with proper track selection support${colors.reset}`); | |
| console.log(`${colors.dim}Audio tracks will be extracted individually to separate files${colors.reset}\n`); | |
| // Process each file | |
| const reports = []; | |
| for (const file of files) { | |
| const report = await processVideoFile(file, forceReprocess); | |
| reports.push(report); | |
| } | |
| // Generate summary | |
| const summaryPath = path.join(OUTPUT_BASE_DIR, 'summary.txt'); | |
| const summary = generateSummary(reports, inputPath, startTime); | |
| await fs.writeFile(summaryPath, summary); | |
| console.log(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`); | |
| console.log(`${icons.complete} ${colors.bright}${colors.green}BATCH PROCESSING COMPLETE${colors.reset}`); | |
| console.log(`${colors.dim}Summary written to: ${summaryPath}${colors.reset}`); | |
| // Print quick stats | |
| const skippedFiles = reports.filter(r => r.skipped).length; | |
| const processedFiles = files.length - skippedFiles; | |
| const totalAudioTracks = reports.reduce((sum, r) => sum + r.audioTracks.length, 0); | |
| const successfulAudioExtractions = reports.reduce((sum, r) => | |
| sum + r.audioResults.filter(res => res.success).length, 0); | |
| const totalSubtitleTracks = reports.reduce((sum, r) => sum + r.subtitleTracks.length, 0); | |
| const successfulSubtitleExtractions = reports.reduce((sum, r) => | |
| sum + r.subtitleResults.filter(res => res.success).length, 0); | |
| const allSuccess = successfulAudioExtractions === totalAudioTracks && | |
| successfulSubtitleExtractions === totalSubtitleTracks; | |
| console.log(`\n${colors.bright}Processed ${processedFiles} files${colors.reset} ${colors.dim}(${skippedFiles} skipped)${colors.reset}`); | |
| const audioColor = successfulAudioExtractions === totalAudioTracks ? colors.green : colors.yellow; | |
| console.log(`${icons.audio} Audio: ${audioColor}${successfulAudioExtractions}/${totalAudioTracks}${colors.reset} tracks extracted`); | |
| if (totalSubtitleTracks > 0) { | |
| const subtitleColor = successfulSubtitleExtractions === totalSubtitleTracks ? colors.green : colors.yellow; | |
| console.log(`${icons.subtitle} Subtitles: ${subtitleColor}${successfulSubtitleExtractions}/${totalSubtitleTracks}${colors.reset} tracks extracted`); | |
| } | |
| } catch (error) { | |
| console.error(`Error: ${error.message}`); | |
| process.exit(1); | |
| } | |
| } | |
| // Run main | |
| main().catch(console.error); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment