Skip to content

Instantly share code, notes, and snippets.

@kwindla
Created March 9, 2021 16:57
Show Gist options
  • Select an option

  • Save kwindla/2c33e45dc90b0bea6a5b4f34dde67cb2 to your computer and use it in GitHub Desktop.

Select an option

Save kwindla/2c33e45dc90b0bea6a5b4f34dde67cb2 to your computer and use it in GitHub Desktop.
Command line utility to composite multiple audio and video files with correct a/v sync
#! /usr/bin/env node
/**
*
* Construct an ffmpeg command that looks like ...
*
ffmpeg \
-i 1614730701736.aac -i 1614730702763.aac -i 1614730703401.m2v -i 1614730705785.m2v \
-t 100.55900001525879 \
-filter_complex "
color=s=1280x720:c=blue [layer1];
[0:a] adelay=0 [aud0];
[1:a] adelay=33400 [aud1];
[aud0][aud1] amix=inputs=2 [audio];
[2:v] setpts=PTS+0.3680000305175781/TB,scale=640x360 [vid2];
[layer1][vid2] overlay=x=0:y=180 [layer2];
[3:v] setpts=PTS+33.55900001525879/TB,scale=640x360 [vid3];
[layer2][vid3] overlay=x=640:y=180 [layer3]
" \
-map [audio] -map [layer3] \
-c:a aac -ac 1 -b:a 64k \
-r 30 \
-c:v libx264 -crf 22 -movflags faststart \
output.mp4
*
* Usage:
*
* ff-composite.js [ list of track files ]
*
* Example:
*
* ff-composite.js $(ls -rth *.webm | tail -n 2)
*
*/
child_process = require('child_process');
const FFMPEG = 'ffmpeg';
const FFPROBE = 'ffprobe';
const outfname = 'output.mp4';
const videoTitle = 'daily test composite';
const backgroundColor = 'blue';
const baseSize = { x: 1280, y: 720 };
const audioBitrate = 64 * 1000;
const videoCrf = 22;
const fargs = process.argv.slice(2);
let audioTracks = [];
let videoTracks = [];
let smallestStartTs = Number.MAX_VALUE;
let largestEndTs = 0;
let duration = 0;
console.log(fargs);
// ----
async function main() {
audioTracks = fargs.filter(hasAudioTrack).map(trackInfo);
videoTracks = fargs.filter(hasVideoTrack).map(trackInfo);
// convert each track and save as a temp file, in order to create clean files
// with timestamps that ffmpeg can properly align, mix, and composite
// for intermediate formats, use double-bitrate aac for audio, and
// intraframe-only mpeg2 for vide
//
for (let track of audioTracks) {
track.tmpfname = Date.now().toString() + '.aac';
console.log(track.fname, ' -> ', track.tmpfname);
let ret = child_process.spawnSync(FFMPEG, [
'-i',
track.fname,
'-af',
'aresample=48000:async=1',
'-c:a',
'aac',
'-ac',
1,
'-b:a',
audioBitrate * 2,
track.tmpfname,
]);
console.log(ret.stderr.toString());
if (ret.status) {
console.log('ERROR');
process.exit(ret.status);
}
}
for (let track of videoTracks) {
track.tmpfname = Date.now().toString() + '.m2v';
console.log(track.fname, ' -> ', track.tmpfname);
let ret = child_process.spawnSync(FFMPEG, [
'-i',
track.fname,
'-c:v',
'mpeg2video',
'-q:v',
1,
'-qmin',
1,
'-intra',
'-r',
30,
track.tmpfname,
]);
console.log(ret.stderr.toString());
if (ret.status) {
console.log('ERROR');
process.exit(ret.status);
}
}
// lay out video tracks side by side
let baseW = baseSize.x / videoTracks.length;
let baseH = baseSize.y;
let baseAspect = baseW / baseH;
let left = 0;
for (t of videoTracks) {
let w, h;
let aspect = t.resolution.x / t.resolution.y;
if (aspect > baseAspect) {
w = baseW;
h = w / aspect;
} else {
h = baseH;
w = h * aspect;
}
t.layoutSize = { x: w, y: h };
// position
let offX = (baseW - w) / 2;
let offY = (baseH - h) / 2;
t.layoutPos = { x: left + offX, y: offY };
left += baseW;
}
// calculate lowest startTs, highest endTs, and overall clip duration
calcAggregateTimestamps();
console.log(
'beginning, end, duration',
smallestStartTs,
largestEndTs,
duration
);
// calculate ts offset (delay) for each track
for (const t of [...audioTracks, ...videoTracks]) {
t.delay = t.startTs - smallestStartTs;
}
console.log('audio tracks', audioTracks);
console.log('video tracks', videoTracks);
// construct ffmpeg command arguments
ffmpegArgs = constructCommandArgs();
ffmpegArgs.push(outfname);
console.log(ffmpegArgs);
const proc = child_process.spawn(FFMPEG, ffmpegArgs);
proc.stdout.on('data', (d) => console.log(d.toString()));
proc.stderr.on('data', (d) => console.log(d.toString()));
}
main();
// ----
function trackInfo(fname) {
const startTs = parseFloat(getTrackStart(fname));
const endTs = parseFloat(getTrackEnd(fname));
const res = getResolution(fname).split('x');
const resolution = { x: parseInt(res[0]), y: parseInt(res[1]) };
return {
fname,
startTs,
endTs,
duration: endTs - startTs,
resolution,
layoutSize: { x: baseSize.x, y: baseSize.y },
layoutPos: { x: 0, y: 0 },
};
}
function calcAggregateTimestamps() {
for (const t of [...audioTracks, ...videoTracks]) {
if (t.startTs < smallestStartTs) {
smallestStartTs = t.startTs;
}
if (t.endTs > largestEndTs) {
largestEndTs = t.endTs;
}
}
duration = largestEndTs - smallestStartTs;
}
function hasAudioTrack(fname) {
const stdout = ffprobe(
'-show_streams -select_streams a -loglevel error',
fname
);
return stdout.length > 0;
}
function hasVideoTrack(fname) {
return (
ffprobe('-show_streams -select_streams v -loglevel error', fname).length > 0
);
}
function getTrackStart(fname) {
return ffprobe(
'-v error -show_entries stream=start_time -of default=noprint_wrappers=1:nokey=1',
fname
);
}
function getTrackEnd(fname) {
return ffprobe(
'-v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 ',
fname
);
}
function getResolution(fname) {
return ffprobe(
'-v error -select_streams v:0 -show_entries stream=height,width -of csv=s=x:p=0 ',
fname
);
}
function ffprobe(commandArgs, fname) {
return child_process
.execSync(`${FFPROBE} -i ${fname} ${commandArgs}`)
.toString()
.trim();
}
function constructCommandArgs() {
// igndts by itself doesn't seem to help
// let ffcmd = ['-vsync', 'cfr', '-fflags', '+discardcorrupt+igndts+ignidx'];
// let ffcmd = ['-copyts']; // maybe skips setpts stuff, somehow? no video plays and conversion is very fast
let ffcmd = [];
for (const t of [...audioTracks, ...videoTracks]) {
ffcmd.push('-i', t.tmpfname);
}
ffcmd.push('-t', duration, '-filter_complex');
// complex filter :-)
let f = `color=s=${baseSize.x}x${baseSize.y}:c=${backgroundColor} [layer${
audioTracks.length - 1
}]; `;
let idx = 0;
// resample lines (with initial ts delay offset)
for (const t of audioTracks) {
// aresample must come before adelay
f += `[${idx}:a] adelay=${t.delay * 1000} [aud${idx}]; `;
idx++;
}
// audio mix line or audio nullsrc
for (var n = 0; n < audioTracks.length; n++) {
f += `[aud${n}]`;
}
if (audioTracks.length > 0) {
f += ` amix=inputs=${audioTracks.length} [audio]; `;
} else {
f += ` anullsrc [audio]; `;
}
// scale (with initial ts delay offset, this time done with pts munging)
// and composite lines
idx = audioTracks.length;
for (const t of videoTracks) {
f += `[${idx}:v] setpts=PTS+${t.delay}/TB,scale=${t.layoutSize.x}x${t.layoutSize.y} [vid${idx}]; `;
f += `[layer${idx - 1}][vid${idx}] overlay=x=${t.layoutPos.x}:y=${
t.layoutPos.y
} [layer${idx}]; `;
idx++;
}
let finalLayer = idx - 1;
// trim the final semicolon off the filter definition and push it onto our
// args list
ffcmd.push(f.substring(0, f.length - 2));
// finish up mappings
ffcmd.push('-map', '[audio]', '-map', `[layer${finalLayer}]`);
// and encoding parameters (make these into parameters, eventually)
ffcmd.push(
'-c:a',
'aac',
'-ac',
'1',
'-b:a',
audioBitrate,
'-r',
30,
'-c:v',
'libx264',
'-crf',
videoCrf,
'-movflags',
'faststart',
'-metadata',
`title=${videoTitle}`
);
return ffcmd;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment