Created
March 9, 2021 16:57
-
-
Save kwindla/2c33e45dc90b0bea6a5b4f34dde67cb2 to your computer and use it in GitHub Desktop.
Command line utility to composite multiple audio and video files with correct a/v sync
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env node | |
| /** | |
| * | |
| * Construct an ffmpeg command that looks like ... | |
| * | |
| ffmpeg \ | |
| -i 1614730701736.aac -i 1614730702763.aac -i 1614730703401.m2v -i 1614730705785.m2v \ | |
| -t 100.55900001525879 \ | |
| -filter_complex " | |
| color=s=1280x720:c=blue [layer1]; | |
| [0:a] adelay=0 [aud0]; | |
| [1:a] adelay=33400 [aud1]; | |
| [aud0][aud1] amix=inputs=2 [audio]; | |
| [2:v] setpts=PTS+0.3680000305175781/TB,scale=640x360 [vid2]; | |
| [layer1][vid2] overlay=x=0:y=180 [layer2]; | |
| [3:v] setpts=PTS+33.55900001525879/TB,scale=640x360 [vid3]; | |
| [layer2][vid3] overlay=x=640:y=180 [layer3] | |
| " \ | |
| -map [audio] -map [layer3] \ | |
| -c:a aac -ac 1 -b:a 64k \ | |
| -r 30 \ | |
| -c:v libx264 -crf 22 -movflags faststart \ | |
| output.mp4 | |
| * | |
| * Usage: | |
| * | |
| * ff-composite.js [ list of track files ] | |
| * | |
| * Example: | |
| * | |
| * ff-composite.js $(ls -rth *.webm | tail -n 2) | |
| * | |
| */ | |
| child_process = require('child_process'); | |
| const FFMPEG = 'ffmpeg'; | |
| const FFPROBE = 'ffprobe'; | |
| const outfname = 'output.mp4'; | |
| const videoTitle = 'daily test composite'; | |
| const backgroundColor = 'blue'; | |
| const baseSize = { x: 1280, y: 720 }; | |
| const audioBitrate = 64 * 1000; | |
| const videoCrf = 22; | |
| const fargs = process.argv.slice(2); | |
| let audioTracks = []; | |
| let videoTracks = []; | |
| let smallestStartTs = Number.MAX_VALUE; | |
| let largestEndTs = 0; | |
| let duration = 0; | |
| console.log(fargs); | |
| // ---- | |
| async function main() { | |
| audioTracks = fargs.filter(hasAudioTrack).map(trackInfo); | |
| videoTracks = fargs.filter(hasVideoTrack).map(trackInfo); | |
| // convert each track and save as a temp file, in order to create clean files | |
| // with timestamps that ffmpeg can properly align, mix, and composite | |
| // for intermediate formats, use double-bitrate aac for audio, and | |
| // intraframe-only mpeg2 for vide | |
| // | |
| for (let track of audioTracks) { | |
| track.tmpfname = Date.now().toString() + '.aac'; | |
| console.log(track.fname, ' -> ', track.tmpfname); | |
| let ret = child_process.spawnSync(FFMPEG, [ | |
| '-i', | |
| track.fname, | |
| '-af', | |
| 'aresample=48000:async=1', | |
| '-c:a', | |
| 'aac', | |
| '-ac', | |
| 1, | |
| '-b:a', | |
| audioBitrate * 2, | |
| track.tmpfname, | |
| ]); | |
| console.log(ret.stderr.toString()); | |
| if (ret.status) { | |
| console.log('ERROR'); | |
| process.exit(ret.status); | |
| } | |
| } | |
| for (let track of videoTracks) { | |
| track.tmpfname = Date.now().toString() + '.m2v'; | |
| console.log(track.fname, ' -> ', track.tmpfname); | |
| let ret = child_process.spawnSync(FFMPEG, [ | |
| '-i', | |
| track.fname, | |
| '-c:v', | |
| 'mpeg2video', | |
| '-q:v', | |
| 1, | |
| '-qmin', | |
| 1, | |
| '-intra', | |
| '-r', | |
| 30, | |
| track.tmpfname, | |
| ]); | |
| console.log(ret.stderr.toString()); | |
| if (ret.status) { | |
| console.log('ERROR'); | |
| process.exit(ret.status); | |
| } | |
| } | |
| // lay out video tracks side by side | |
| let baseW = baseSize.x / videoTracks.length; | |
| let baseH = baseSize.y; | |
| let baseAspect = baseW / baseH; | |
| let left = 0; | |
| for (t of videoTracks) { | |
| let w, h; | |
| let aspect = t.resolution.x / t.resolution.y; | |
| if (aspect > baseAspect) { | |
| w = baseW; | |
| h = w / aspect; | |
| } else { | |
| h = baseH; | |
| w = h * aspect; | |
| } | |
| t.layoutSize = { x: w, y: h }; | |
| // position | |
| let offX = (baseW - w) / 2; | |
| let offY = (baseH - h) / 2; | |
| t.layoutPos = { x: left + offX, y: offY }; | |
| left += baseW; | |
| } | |
| // calculate lowest startTs, highest endTs, and overall clip duration | |
| calcAggregateTimestamps(); | |
| console.log( | |
| 'beginning, end, duration', | |
| smallestStartTs, | |
| largestEndTs, | |
| duration | |
| ); | |
| // calculate ts offset (delay) for each track | |
| for (const t of [...audioTracks, ...videoTracks]) { | |
| t.delay = t.startTs - smallestStartTs; | |
| } | |
| console.log('audio tracks', audioTracks); | |
| console.log('video tracks', videoTracks); | |
| // construct ffmpeg command arguments | |
| ffmpegArgs = constructCommandArgs(); | |
| ffmpegArgs.push(outfname); | |
| console.log(ffmpegArgs); | |
| const proc = child_process.spawn(FFMPEG, ffmpegArgs); | |
| proc.stdout.on('data', (d) => console.log(d.toString())); | |
| proc.stderr.on('data', (d) => console.log(d.toString())); | |
| } | |
| main(); | |
| // ---- | |
| function trackInfo(fname) { | |
| const startTs = parseFloat(getTrackStart(fname)); | |
| const endTs = parseFloat(getTrackEnd(fname)); | |
| const res = getResolution(fname).split('x'); | |
| const resolution = { x: parseInt(res[0]), y: parseInt(res[1]) }; | |
| return { | |
| fname, | |
| startTs, | |
| endTs, | |
| duration: endTs - startTs, | |
| resolution, | |
| layoutSize: { x: baseSize.x, y: baseSize.y }, | |
| layoutPos: { x: 0, y: 0 }, | |
| }; | |
| } | |
| function calcAggregateTimestamps() { | |
| for (const t of [...audioTracks, ...videoTracks]) { | |
| if (t.startTs < smallestStartTs) { | |
| smallestStartTs = t.startTs; | |
| } | |
| if (t.endTs > largestEndTs) { | |
| largestEndTs = t.endTs; | |
| } | |
| } | |
| duration = largestEndTs - smallestStartTs; | |
| } | |
| function hasAudioTrack(fname) { | |
| const stdout = ffprobe( | |
| '-show_streams -select_streams a -loglevel error', | |
| fname | |
| ); | |
| return stdout.length > 0; | |
| } | |
| function hasVideoTrack(fname) { | |
| return ( | |
| ffprobe('-show_streams -select_streams v -loglevel error', fname).length > 0 | |
| ); | |
| } | |
| function getTrackStart(fname) { | |
| return ffprobe( | |
| '-v error -show_entries stream=start_time -of default=noprint_wrappers=1:nokey=1', | |
| fname | |
| ); | |
| } | |
| function getTrackEnd(fname) { | |
| return ffprobe( | |
| '-v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 ', | |
| fname | |
| ); | |
| } | |
| function getResolution(fname) { | |
| return ffprobe( | |
| '-v error -select_streams v:0 -show_entries stream=height,width -of csv=s=x:p=0 ', | |
| fname | |
| ); | |
| } | |
| function ffprobe(commandArgs, fname) { | |
| return child_process | |
| .execSync(`${FFPROBE} -i ${fname} ${commandArgs}`) | |
| .toString() | |
| .trim(); | |
| } | |
| function constructCommandArgs() { | |
| // igndts by itself doesn't seem to help | |
| // let ffcmd = ['-vsync', 'cfr', '-fflags', '+discardcorrupt+igndts+ignidx']; | |
| // let ffcmd = ['-copyts']; // maybe skips setpts stuff, somehow? no video plays and conversion is very fast | |
| let ffcmd = []; | |
| for (const t of [...audioTracks, ...videoTracks]) { | |
| ffcmd.push('-i', t.tmpfname); | |
| } | |
| ffcmd.push('-t', duration, '-filter_complex'); | |
| // complex filter :-) | |
| let f = `color=s=${baseSize.x}x${baseSize.y}:c=${backgroundColor} [layer${ | |
| audioTracks.length - 1 | |
| }]; `; | |
| let idx = 0; | |
| // resample lines (with initial ts delay offset) | |
| for (const t of audioTracks) { | |
| // aresample must come before adelay | |
| f += `[${idx}:a] adelay=${t.delay * 1000} [aud${idx}]; `; | |
| idx++; | |
| } | |
| // audio mix line or audio nullsrc | |
| for (var n = 0; n < audioTracks.length; n++) { | |
| f += `[aud${n}]`; | |
| } | |
| if (audioTracks.length > 0) { | |
| f += ` amix=inputs=${audioTracks.length} [audio]; `; | |
| } else { | |
| f += ` anullsrc [audio]; `; | |
| } | |
| // scale (with initial ts delay offset, this time done with pts munging) | |
| // and composite lines | |
| idx = audioTracks.length; | |
| for (const t of videoTracks) { | |
| f += `[${idx}:v] setpts=PTS+${t.delay}/TB,scale=${t.layoutSize.x}x${t.layoutSize.y} [vid${idx}]; `; | |
| f += `[layer${idx - 1}][vid${idx}] overlay=x=${t.layoutPos.x}:y=${ | |
| t.layoutPos.y | |
| } [layer${idx}]; `; | |
| idx++; | |
| } | |
| let finalLayer = idx - 1; | |
| // trim the final semicolon off the filter definition and push it onto our | |
| // args list | |
| ffcmd.push(f.substring(0, f.length - 2)); | |
| // finish up mappings | |
| ffcmd.push('-map', '[audio]', '-map', `[layer${finalLayer}]`); | |
| // and encoding parameters (make these into parameters, eventually) | |
| ffcmd.push( | |
| '-c:a', | |
| 'aac', | |
| '-ac', | |
| '1', | |
| '-b:a', | |
| audioBitrate, | |
| '-r', | |
| 30, | |
| '-c:v', | |
| 'libx264', | |
| '-crf', | |
| videoCrf, | |
| '-movflags', | |
| 'faststart', | |
| '-metadata', | |
| `title=${videoTitle}` | |
| ); | |
| return ffcmd; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment