Skip to content

Instantly share code, notes, and snippets.

@seatedro
Last active October 3, 2024 03:17
Show Gist options
  • Save seatedro/8335c910c730d5be722bccda56878aea to your computer and use it in GitHub Desktop.
Save seatedro/8335c910c730d5be722bccda56878aea to your computer and use it in GitHub Desktop.
transcoder_from_scratch_in_js
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>webcodecs</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 0;
background-color: #2c2c2c;
color: #e0e0e0;
height: 100vh;
display: flex;
justify-content: center;
align-items: center;
}
.container {
width: 90%;
max-width: 800px;
background-color: #363636;
border-radius: 8px;
box-shadow: 0 0 20px rgba(0, 0, 0, 0.3);
overflow: hidden;
}
.title-bar {
background-color: #1e1e1e;
padding: 10px 20px;
display: flex;
justify-content: space-between;
align-items: center;
border-bottom: 1px solid #4a4a4a;
}
h1 {
margin: 0;
font-size: 18px;
font-weight: normal;
}
.window-controls {
display: flex;
gap: 10px;
}
.window-control {
width: 12px;
height: 12px;
border-radius: 50%;
background-color: #5a5a5a;
}
.main-content {
padding: 20px;
}
.drop-zone {
border: 2px dashed #5a5a5a;
border-radius: 8px;
padding: 40px 20px;
text-align: center;
margin-bottom: 20px;
cursor: pointer;
transition: background-color 0.3s ease;
}
.drop-zone:hover,
.drop-zone.dragover {
background-color: #4a4a4a;
}
label {
display: block;
margin-top: 15px;
font-weight: bold;
font-size: 14px;
}
select,
input {
width: 100%;
padding: 8px;
margin-top: 5px;
background-color: #2c2c2c;
border: 1px solid #5a5a5a;
border-radius: 4px;
color: #e0e0e0;
}
button {
display: block;
width: 100%;
padding: 10px;
margin-top: 20px;
background-color: #007acc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
transition: background-color 0.3s ease;
}
button:hover {
background-color: #005999;
}
#downloadLink {
display: block;
margin-top: 20px;
text-align: center;
color: #007acc;
text-decoration: none;
}
#downloadLink:hover {
text-decoration: underline;
}
#progress,
#codecInfo {
margin-top: 20px;
font-size: 14px;
}
#logContainer {
margin-top: 20px;
border: 1px solid #5a5a5a;
padding: 10px;
height: 150px;
overflow-y: auto;
background-color: #2c2c2c;
font-family: 'Consolas', 'Courier New', monospace;
font-size: 12px;
}
</style>
</head>
<body>
<div class="container">
<div class="title-bar">
<h1>transcoder type shit</h1>
<div class="window-controls">
<div class="window-control"></div>
<div class="window-control"></div>
<div class="window-control"></div>
</div>
</div>
<div class="main-content">
<div id="dropZone" class="drop-zone">
Drag and drop a video file here or click to select
<input type="file" id="videoInput" accept="video/*" style="display: none;">
</div>
<label for="outputCodec">Select Output Codec:</label>
<select id="outputCodec"></select>
<button id="convertButton">Convert Video</button>
<div id="progress"></div>
<a id="downloadLink" href="#" download="converted_video.webm" style="display:none;">Download
Converted Video</a>
<div id="codecInfo"></div>
<div id="logContainer"></div>
</div>
</div>
<script type="module">
const videoInput = document.getElementById('videoInput');
const dropZone = document.getElementById('dropZone');
const outputCodecSelect = document.getElementById('outputCodec');
const convertButton = document.getElementById('convertButton');
const downloadLink = document.getElementById('downloadLink');
const progressDiv = document.getElementById('progress');
const codecInfoDiv = document.getElementById('codecInfo');
const logContainer = document.getElementById('logContainer');
function log(message) {
console.log(message);
const logEntry = document.createElement('div');
logEntry.textContent = message;
logContainer.appendChild(logEntry);
logContainer.scrollTop = logContainer.scrollHeight;
}
// Drag and drop functionality
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('dragover');
});
dropZone.addEventListener('dragleave', () => {
dropZone.classList.remove('dragover');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('dragover');
videoInput.files = e.dataTransfer.files;
log(`File selected: ${videoInput.files[0].name}`);
});
dropZone.addEventListener('click', () => {
videoInput.click();
});
videoInput.addEventListener('change', () => {
if (videoInput.files.length > 0) {
log(`File selected: ${videoInput.files[0].name}`);
}
});
// Check supported codecs and populate the select element
async function checkSupportedCodecs() {
const codecs = [
{name: 'H.264', value: 'avc'},
{name: 'H.265', value: 'hevc'},
{name: 'VP9', value: 'vp9'},
{name: 'AV1', value: 'av1'}
];
for (const codec of codecs) {
const config = await getEncoderConfig(codec.value, 640, 480, 30);
if (config) {
const option = document.createElement('option');
option.value = codec.value;
option.textContent = codec.name;
outputCodecSelect.appendChild(option);
codecInfoDiv.innerHTML += `<p>${codec.name}: Supported (Hardware Acceleration: ${config.hardwareAcceleration})</p>`;
} else {
codecInfoDiv.innerHTML += `<p>${codec.name}: Not supported</p>`;
}
}
log('Supported codecs checked and populated');
}
checkSupportedCodecs();
convertButton.addEventListener('click', async () => {
if (!videoInput.files.length) {
alert('Please select a video file.');
return;
}
const file = videoInput.files[0];
const outputCodec = outputCodecSelect.value;
try {
log(`Starting conversion to ${outputCodec}`);
const videoURL = URL.createObjectURL(file);
const video = document.createElement('video');
video.src = videoURL;
await new Promise((resolve, reject) => {
video.onloadedmetadata = resolve;
video.onerror = reject;
});
const width = video.videoWidth;
const height = video.videoHeight;
const duration = video.duration;
const framerate = 60;
log(`Video details: ${width}x${height}, ${duration}s, ${framerate}fps`);
const canvas = new OffscreenCanvas(width, height);
const ctx = canvas.getContext('2d');
let videoEncoder = null;
let audioEncoder = null;
let muxer = null;
let startTime = null;
let framesGenerated = 0;
let lastKeyFrame = -Infinity;
let isConverting = true;
const audioContext = new AudioContext();
const sourceNode = audioContext.createMediaElementSource(video);
const processorNode = audioContext.createScriptProcessor(4096, 2, 2);
sourceNode.connect(processorNode);
processorNode.connect(audioContext.destination);
let audioEncoderConfig;
let audioCodecString;
// Initialize MP4 muxer
muxer = new Muxer({
target: new ArrayBufferTarget(),
video: {
codec: outputCodec,
width: width,
height: height,
frameRate: framerate
},
audio: {
codec: 'aac',
sampleRate: audioContext.sampleRate,
numberOfChannels: 2
},
fastStart: 'in-memory',
firstTimestampBehavior: 'offset'
});
videoEncoder = new VideoEncoder({
output: (chunk, meta) => muxer.addVideoChunk(chunk, meta),
error: e => log(`VideoEncoder error: ${e.message}`)
});
const config = await getEncoderConfig(outputCodec, width, height, framerate);
console.log("Encoder config: ", config);
videoEncoder.configure(config);
audioEncoderConfig = {
codec: 'mp4a.40.2',
sampleRate: audioContext.sampleRate,
numberOfChannels: 2,
bitrate: 128_000
};
audioCodecString = 'mp4a.40.2';
audioEncoder = new AudioEncoder({
output: (chunk, meta) => {
if (!isConverting) return;
const chunkData = new Uint8Array(chunk.byteLength);
chunk.copyTo(chunkData);
muxer.addAudioChunk(chunk, meta);
},
error: e => log(`AudioEncoder error: ${e.message}`)
});
await audioEncoder.configure(audioEncoderConfig);
processorNode.onaudioprocess = (event) => {
const inputBuffer = event.inputBuffer;
const numOfChannels = inputBuffer.numberOfChannels;
const numOfFrames = inputBuffer.length;
const audioData = new Float32Array(numOfFrames * numOfChannels);
for (let channel = 0; channel < numOfChannels; channel++) {
inputBuffer.copyFromChannel(audioData.subarray(channel * numOfFrames, (channel + 1) * numOfFrames), channel);
}
const audioChunk = new AudioData({
data: audioData.buffer,
format: 'f32',
sampleRate: audioContext.sampleRate,
numberOfFrames: numOfFrames,
numberOfChannels: numOfChannels,
timestamp: audioContext.currentTime * 1_000_000 // Convert to microseconds
});
audioEncoder.encode(audioChunk);
audioChunk.close();
};
startTime = performance.now();
// Start processing video frames
let frameCounter = 0;
const totalFrames = Math.ceil(duration * framerate);
video.play();
const processFrame = async () => {
if (!isConverting) return;
if (video.currentTime >= duration) {
isConverting = false;
// End of video
await videoEncoder.flush();
await audioEncoder.flush();
let outputBlob;
muxer.finalize();
const buffer = muxer.target.buffer;
outputBlob = new Blob([buffer], {type: 'video/mp4'});
const outputURL = URL.createObjectURL(outputBlob);
downloadLink.href = outputURL;
downloadLink.style.display = 'block';
downloadLink.download = `converted_video.${getExtension(outputCodec)}`;
progressDiv.textContent = 'Conversion complete! Click the download link.';
log('Conversion complete. Download link ready.');
return;
}
ctx.drawImage(video, 0, 0, width, height);
const timestamp = frameCounter * 1_000_000 / framerate; // In microseconds
const videoFrame = new VideoFrame(canvas, {timestamp});
// Ensure a keyframe every 5 seconds
const elapsedTime = performance.now() - startTime;
const needsKeyFrame = (elapsedTime - lastKeyFrame) >= 5000;
if (needsKeyFrame) lastKeyFrame = elapsedTime;
videoEncoder.encode(videoFrame, {keyFrame: needsKeyFrame});
videoFrame.close();
frameCounter++;
const progress = Math.round((frameCounter / totalFrames) * 100);
progressDiv.textContent = `Converting: ${progress}%`;
if (frameCounter % 30 === 0) {
log(`Conversion progress: ${progress}%`);
}
requestAnimationFrame(processFrame);
};
processFrame();
} catch (err) {
console.error('Conversion error:', err);
log(`Error during conversion: ${err.message}`);
alert('Error during conversion. Check the console and log for details.');
}
});
// Helper function to download the final MP4 file
function downloadBlob(blob) {
const url = window.URL.createObjectURL(blob);
downloadLink.href = url;
downloadLink.style.display = 'block';
downloadLink.download = 'converted_video.mp4';
}
async function getEncoderConfig(codec, width, height, framerate) {
const baseConfig = {
width: width,
height: height,
bitrate: 5_000_000, // 5 Mbps
framerate: framerate,
};
const codecConfig = {
avc: {...baseConfig, codec: 'avc1.42E01E'},
hevc: {...baseConfig, codec: 'hev1.1.6.L93.B0'},
av1: {...baseConfig, codec: 'av01.0.08M.08'},
vp9: {...baseConfig, codec: 'vp09.00.10.08'}
};
let config = codecConfig[codec];
// Try hardware acceleration first
config.hardwareAcceleration = 'prefer-hardware';
let isSupported = await VideoEncoder.isConfigSupported(config);
if (!isSupported.supported) {
// If hardware acceleration is not supported, try software
config.hardwareAcceleration = 'prefer-software';
isSupported = await VideoEncoder.isConfigSupported(config);
}
if (isSupported.supported) {
return config;
}
// For H.264 and HEVC, try different profiles if the initial one is not supported
if (codec === 'h264' || codec === 'hevc') {
const profiles = codec === 'h264'
? ['42E01E', '42001F', '4D001F', '640028']
: ['1.6.L93.B0', '1.2.L120.B0', '2.1.L93.B0'];
for (const profile of profiles) {
config.codec = `${codec === 'h264' ? 'avc1' : 'hev1'}.${profile}`;
isSupported = await VideoEncoder.isConfigSupported(config);
if (isSupported.supported) {
log(`Using ${codec.toUpperCase()} profile: ${profile}`);
return config;
}
}
}
// If we reach here, the codec is not supported
return null;
}
function getMimeType(codec) {
const mimeTypes = {
vp8: 'video/webm; codecs="vp8"',
vp9: 'video/webm; codecs="vp9"',
av1: 'video/webm; codecs="av01.0.08M.08"',
h264: 'video/mp4; codecs="avc1.42E01E"'
};
return mimeTypes[codec] || 'video/webm';
}
function getExtension(codec) {
return codec === 'h264' ? 'mp4' : 'webm';
}
/** MP4 muxing logic.
* src: https://github.com/Vanilagy/mp4-muxer
*/
let bytes = new Uint8Array(8);
let view = new DataView(bytes.buffer);
const u8 = (value) => {
return [((value % 0x100) + 0x100) % 0x100];
};
const u16 = (value) => {
view.setUint16(0, value, false);
return [bytes[0], bytes[1]];
};
const i16 = (value) => {
view.setInt16(0, value, false);
return [bytes[0], bytes[1]];
};
const u24 = (value) => {
view.setUint32(0, value, false);
return [bytes[1], bytes[2], bytes[3]];
};
const u32 = (value) => {
view.setUint32(0, value, false);
return [bytes[0], bytes[1], bytes[2], bytes[3]];
};
const i32 = (value) => {
view.setInt32(0, value, false);
return [bytes[0], bytes[1], bytes[2], bytes[3]];
};
const u64 = (value) => {
view.setUint32(0, Math.floor(value / 2 ** 32), false);
view.setUint32(4, value, false);
return [
bytes[0],
bytes[1],
bytes[2],
bytes[3],
bytes[4],
bytes[5],
bytes[6],
bytes[7],
];
};
const fixed_8_8 = (value) => {
view.setInt16(0, 2 ** 8 * value, false);
return [bytes[0], bytes[1]];
};
const fixed_16_16 = (value) => {
view.setInt32(0, 2 ** 16 * value, false);
return [bytes[0], bytes[1], bytes[2], bytes[3]];
};
const fixed_2_30 = (value) => {
view.setInt32(0, 2 ** 30 * value, false);
return [bytes[0], bytes[1], bytes[2], bytes[3]];
};
const ascii = (text, nullTerminated = false) => {
let bytes = Array(text.length)
.fill(null)
.map((_, i) => text.charCodeAt(i));
if (nullTerminated)
bytes.push(0x00);
return bytes;
};
const last = (arr) => {
return arr && arr[arr.length - 1];
};
const lastPresentedSample = (samples) => {
let result = undefined;
for (let sample of samples) {
if (!result ||
sample.presentationTimestamp > result.presentationTimestamp) {
result = sample;
}
}
return result;
};
const intoTimescale = (timeInSeconds, timescale, round = true) => {
let value = timeInSeconds * timescale;
return round ? Math.round(value) : value;
};
const rotationMatrix = (rotationInDegrees) => {
let theta = rotationInDegrees * (Math.PI / 180);
let cosTheta = Math.cos(theta);
let sinTheta = Math.sin(theta);
return [cosTheta, sinTheta, 0, -sinTheta, cosTheta, 0, 0, 0, 1];
};
const IDENTITY_MATRIX = rotationMatrix(0);
const matrixToBytes = (matrix) => {
return [
fixed_16_16(matrix[0]),
fixed_16_16(matrix[1]),
fixed_2_30(matrix[2]),
fixed_16_16(matrix[3]),
fixed_16_16(matrix[4]),
fixed_2_30(matrix[5]),
fixed_16_16(matrix[6]),
fixed_16_16(matrix[7]),
fixed_2_30(matrix[8]),
];
};
const deepClone = (x) => {
if (!x)
return x;
if (typeof x !== "object")
return x;
if (Array.isArray(x))
return x.map(deepClone);
return Object.fromEntries(Object.entries(x).map(([key, value]) => [key, deepClone(value)]));
};
const isU32 = (value) => {
return value >= 0 && value < 2 ** 32;
};
const box = (type, contents, children) => ({
type,
contents: contents && new Uint8Array(contents.flat(10)),
children,
});
const fullBox = (type, version, flags, contents, children) => box(type, [u8(version), u24(flags), contents ?? []], children);
const ftyp = (details) => {
let minorVersion = 0x200;
if (details.fragmented)
return box("ftyp", [
ascii("iso5"),
u32(minorVersion),
ascii("iso5"),
ascii("iso6"),
ascii("mp41"),
]);
return box("ftyp", [
ascii("isom"),
u32(minorVersion),
ascii("isom"),
details.holdsAvc ? ascii("avc1") : [],
ascii("mp41"),
]);
};
const mdat = (reserveLargeSize) => ({
type: "mdat",
largeSize: reserveLargeSize,
});
const free = (size) => ({type: "free", size});
const moov = (tracks, creationTime, fragmented = false) => box("moov", null, [
mvhd(creationTime, tracks),
...tracks.map((x) => trak(x, creationTime)),
fragmented ? mvex(tracks) : null,
]);
const mvhd = (creationTime, tracks) => {
let duration = intoTimescale(Math.max(0, ...tracks
.filter((x) => x.samples.length > 0)
.map((x) => {
const lastSample = lastPresentedSample(x.samples);
return lastSample.presentationTimestamp + lastSample.duration;
})), GLOBAL_TIMESCALE);
let nextTrackId = Math.max(...tracks.map((x) => x.id)) + 1;
let needsU64 = !isU32(creationTime) || !isU32(duration);
let u32OrU64 = needsU64 ? u64 : u32;
return fullBox("mvhd", +needsU64, 0, [
u32OrU64(creationTime),
u32OrU64(creationTime),
u32(GLOBAL_TIMESCALE),
u32OrU64(duration),
fixed_16_16(1),
fixed_8_8(1),
Array(10).fill(0),
matrixToBytes(IDENTITY_MATRIX),
Array(24).fill(0),
u32(nextTrackId),
]);
};
const trak = (track, creationTime) => box("trak", null, [tkhd(track, creationTime), mdia(track, creationTime)]);
const tkhd = (track, creationTime) => {
let lastSample = lastPresentedSample(track.samples);
let durationInGlobalTimescale = intoTimescale(lastSample ? lastSample.presentationTimestamp + lastSample.duration : 0, GLOBAL_TIMESCALE);
let needsU64 = !isU32(creationTime) || !isU32(durationInGlobalTimescale);
let u32OrU64 = needsU64 ? u64 : u32;
let matrix;
if (track.info.type === "video") {
matrix =
typeof track.info.rotation === "number"
? rotationMatrix(track.info.rotation)
: track.info.rotation;
}
else {
matrix = IDENTITY_MATRIX;
}
return fullBox("tkhd", +needsU64, 3, [
u32OrU64(creationTime),
u32OrU64(creationTime),
u32(track.id),
u32(0),
u32OrU64(durationInGlobalTimescale),
Array(8).fill(0),
u16(0),
u16(0),
fixed_8_8(track.info.type === "audio" ? 1 : 0),
u16(0),
matrixToBytes(matrix),
fixed_16_16(track.info.type === "video" ? track.info.width : 0),
fixed_16_16(track.info.type === "video" ? track.info.height : 0),
]);
};
const mdia = (track, creationTime) => box("mdia", null, [
mdhd(track, creationTime),
hdlr(track.info.type === "video" ? "vide" : "soun"),
minf(track),
]);
const mdhd = (track, creationTime) => {
let lastSample = lastPresentedSample(track.samples);
let localDuration = intoTimescale(lastSample ? lastSample.presentationTimestamp + lastSample.duration : 0, track.timescale);
let needsU64 = !isU32(creationTime) || !isU32(localDuration);
let u32OrU64 = needsU64 ? u64 : u32;
return fullBox("mdhd", +needsU64, 0, [
u32OrU64(creationTime),
u32OrU64(creationTime),
u32(track.timescale),
u32OrU64(localDuration),
u16(0b01010101_11000100),
u16(0),
]);
};
const hdlr = (componentSubtype) => fullBox("hdlr", 0, 0, [
ascii("mhlr"),
ascii(componentSubtype),
u32(0),
u32(0),
u32(0),
ascii("mp4-muxer-hdlr", true),
]);
const minf = (track) => box("minf", null, [
track.info.type === "video" ? vmhd() : smhd(),
dinf(),
stbl(track),
]);
const vmhd = () => fullBox("vmhd", 0, 1, [
u16(0),
u16(0),
u16(0),
u16(0),
]);
const smhd = () => fullBox("smhd", 0, 0, [
u16(0),
u16(0),
]);
const dinf = () => box("dinf", null, [dref()]);
const dref = () => fullBox("dref", 0, 0, [
u32(1),
], [url()]);
const url = () => fullBox("url ", 0, 1);
const stbl = (track) => {
const needsCtts = track.compositionTimeOffsetTable.length > 1 ||
track.compositionTimeOffsetTable.some((x) => x.sampleCompositionTimeOffset !== 0);
return box("stbl", null, [
stsd(track),
stts(track),
stss(track),
stsc(track),
stsz(track),
stco(track),
needsCtts ? ctts(track) : null,
]);
};
const stsd = (track) => fullBox("stsd", 0, 0, [
u32(1),
], [
track.info.type === "video"
? videoSampleDescription(VIDEO_CODEC_TO_BOX_NAME[track.info.codec], track)
: soundSampleDescription(AUDIO_CODEC_TO_BOX_NAME[track.info.codec], track),
]);
const videoSampleDescription = (compressionType, track) => box(compressionType, [
Array(6).fill(0),
u16(1),
u16(0),
u16(0),
Array(12).fill(0),
u16(track.info.width),
u16(track.info.height),
u32(0x00480000),
u32(0x00480000),
u32(0),
u16(1),
Array(32).fill(0),
u16(0x0018),
i16(0xffff),
], [VIDEO_CODEC_TO_CONFIGURATION_BOX[track.info.codec](track)]);
const avcC = (track) => track.info.decoderConfig &&
box("avcC", [
...new Uint8Array(track.info.decoderConfig.description),
]);
const hvcC = (track) => track.info.decoderConfig &&
box("hvcC", [
...new Uint8Array(track.info.decoderConfig.description),
]);
const vpcC = (track) => {
if (!track.info.decoderConfig) {
return null;
}
let decoderConfig = track.info.decoderConfig;
if (!decoderConfig.colorSpace) {
throw new Error(`'colorSpace' is required in the decoder config for VP9.`);
}
let parts = decoderConfig.codec.split(".");
let profile = Number(parts[1]);
let level = Number(parts[2]);
let bitDepth = Number(parts[3]);
let chromaSubsampling = 0;
let thirdByte = (bitDepth << 4) +
(chromaSubsampling << 1) +
Number(decoderConfig.colorSpace.fullRange);
let colourPrimaries = 2;
let transferCharacteristics = 2;
let matrixCoefficients = 2;
return fullBox("vpcC", 1, 0, [
u8(profile),
u8(level),
u8(thirdByte),
u8(colourPrimaries),
u8(transferCharacteristics),
u8(matrixCoefficients),
u16(0),
]);
};
const av1C = () => {
let marker = 1;
let version = 1;
let firstByte = (marker << 7) + version;
return box("av1C", [firstByte, 0, 0, 0]);
};
const soundSampleDescription = (compressionType, track) => box(compressionType, [
Array(6).fill(0),
u16(1),
u16(0),
u16(0),
u32(0),
u16(track.info.numberOfChannels),
u16(16),
u16(0),
u16(0),
fixed_16_16(track.info.sampleRate),
], [AUDIO_CODEC_TO_CONFIGURATION_BOX[track.info.codec](track)]);
const esds = (track) => {
let description = new Uint8Array(track.info.decoderConfig.description);
return fullBox("esds", 0, 0, [
u32(0x03808080),
u8(0x20 + description.byteLength),
u16(1),
u8(0x00),
u32(0x04808080),
u8(0x12 + description.byteLength),
u8(0x40),
u8(0x15),
u24(0),
u32(0x0001fc17),
u32(0x0001fc17),
u32(0x05808080),
u8(description.byteLength),
...description,
u32(0x06808080),
u8(0x01),
u8(0x02),
]);
};
const dOps = (track) => {
let preskip = 3840;
let gain = 0;
const description = track.info.decoderConfig?.description;
if (description) {
if (description.byteLength < 18) {
throw new TypeError("Invalid decoder description provided for Opus; must be at least 18 bytes long.");
}
const view = ArrayBuffer.isView(description)
? new DataView(description.buffer, description.byteOffset, description.byteLength)
: new DataView(description);
preskip = view.getUint16(10, true);
gain = view.getInt16(14, true);
}
return box("dOps", [
u8(0),
u8(track.info.numberOfChannels),
u16(preskip),
u32(track.info.sampleRate),
fixed_8_8(gain),
u8(0),
]);
};
const stts = (track) => {
return fullBox("stts", 0, 0, [
u32(track.timeToSampleTable.length),
track.timeToSampleTable.map((x) => [
u32(x.sampleCount),
u32(x.sampleDelta),
]),
]);
};
const stss = (track) => {
if (track.samples.every((x) => x.type === "key"))
return null;
let keySamples = [...track.samples.entries()].filter(([, sample]) => sample.type === "key");
return fullBox("stss", 0, 0, [
u32(keySamples.length),
keySamples.map(([index]) => u32(index + 1)),
]);
};
const stsc = (track) => {
return fullBox("stsc", 0, 0, [
u32(track.compactlyCodedChunkTable.length),
track.compactlyCodedChunkTable.map((x) => [
u32(x.firstChunk),
u32(x.samplesPerChunk),
u32(1),
]),
]);
};
const stsz = (track) => fullBox("stsz", 0, 0, [
u32(0),
u32(track.samples.length),
track.samples.map((x) => u32(x.size)),
]);
const stco = (track) => {
if (track.finalizedChunks.length > 0 &&
last(track.finalizedChunks).offset >= 2 ** 32) {
return fullBox("co64", 0, 0, [
u32(track.finalizedChunks.length),
track.finalizedChunks.map((x) => u64(x.offset)),
]);
}
return fullBox("stco", 0, 0, [
u32(track.finalizedChunks.length),
track.finalizedChunks.map((x) => u32(x.offset)),
]);
};
const ctts = (track) => {
return fullBox("ctts", 0, 0, [
u32(track.compositionTimeOffsetTable.length),
track.compositionTimeOffsetTable.map((x) => [
u32(x.sampleCount),
u32(x.sampleCompositionTimeOffset),
]),
]);
};
const mvex = (tracks) => {
return box("mvex", null, tracks.map(trex));
};
const trex = (track) => {
return fullBox("trex", 0, 0, [
u32(track.id),
u32(1),
u32(0),
u32(0),
u32(0),
]);
};
const moof = (sequenceNumber, tracks) => {
return box("moof", null, [mfhd(sequenceNumber), ...tracks.map(traf)]);
};
const mfhd = (sequenceNumber) => {
return fullBox("mfhd", 0, 0, [
u32(sequenceNumber),
]);
};
const fragmentSampleFlags = (sample) => {
let byte1 = 0;
let byte2 = 0;
let byte3 = 0;
let byte4 = 0;
let sampleIsDifferenceSample = sample.type === "delta";
byte2 |= +sampleIsDifferenceSample;
if (sampleIsDifferenceSample) {
byte1 |= 1;
}
else {
byte1 |= 2;
}
return (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
};
const traf = (track) => {
return box("traf", null, [tfhd(track), tfdt(track), trun(track)]);
};
const tfhd = (track) => {
let tfFlags = 0;
tfFlags |= 0x00008;
tfFlags |= 0x00010;
tfFlags |= 0x00020;
tfFlags |= 0x20000;
let referenceSample = track.currentChunk.samples[1] ?? track.currentChunk.samples[0];
let referenceSampleInfo = {
duration: referenceSample.timescaleUnitsToNextSample,
size: referenceSample.size,
flags: fragmentSampleFlags(referenceSample),
};
return fullBox("tfhd", 0, tfFlags, [
u32(track.id),
u32(referenceSampleInfo.duration),
u32(referenceSampleInfo.size),
u32(referenceSampleInfo.flags),
]);
};
const tfdt = (track) => {
return fullBox("tfdt", 1, 0, [
u64(intoTimescale(track.currentChunk.startTimestamp, track.timescale)),
]);
};
const trun = (track) => {
let allSampleDurations = track.currentChunk.samples.map((x) => x.timescaleUnitsToNextSample);
let allSampleSizes = track.currentChunk.samples.map((x) => x.size);
let allSampleFlags = track.currentChunk.samples.map(fragmentSampleFlags);
let allSampleCompositionTimeOffsets = track.currentChunk.samples.map((x) => intoTimescale(x.presentationTimestamp - x.decodeTimestamp, track.timescale));
let uniqueSampleDurations = new Set(allSampleDurations);
let uniqueSampleSizes = new Set(allSampleSizes);
let uniqueSampleFlags = new Set(allSampleFlags);
let uniqueSampleCompositionTimeOffsets = new Set(allSampleCompositionTimeOffsets);
let firstSampleFlagsPresent = uniqueSampleFlags.size === 2 && allSampleFlags[0] !== allSampleFlags[1];
let sampleDurationPresent = uniqueSampleDurations.size > 1;
let sampleSizePresent = uniqueSampleSizes.size > 1;
let sampleFlagsPresent = !firstSampleFlagsPresent && uniqueSampleFlags.size > 1;
let sampleCompositionTimeOffsetsPresent = uniqueSampleCompositionTimeOffsets.size > 1 ||
[...uniqueSampleCompositionTimeOffsets].some((x) => x !== 0);
let flags = 0;
flags |= 0x0001;
flags |= 0x0004 * +firstSampleFlagsPresent;
flags |= 0x0100 * +sampleDurationPresent;
flags |= 0x0200 * +sampleSizePresent;
flags |= 0x0400 * +sampleFlagsPresent;
flags |= 0x0800 * +sampleCompositionTimeOffsetsPresent;
return fullBox("trun", 1, flags, [
u32(track.currentChunk.samples.length),
u32(track.currentChunk.offset - track.currentChunk.moofOffset || 0),
firstSampleFlagsPresent ? u32(allSampleFlags[0]) : [],
track.currentChunk.samples.map((_, i) => [
sampleDurationPresent ? u32(allSampleDurations[i]) : [],
sampleSizePresent ? u32(allSampleSizes[i]) : [],
sampleFlagsPresent ? u32(allSampleFlags[i]) : [],
sampleCompositionTimeOffsetsPresent
? i32(allSampleCompositionTimeOffsets[i])
: [],
]),
]);
};
const mfra = (tracks) => {
return box("mfra", null, [...tracks.map(tfra), mfro()]);
};
const tfra = (track, trackIndex) => {
let version = 1;
return fullBox("tfra", version, 0, [
u32(track.id),
u32(0b111111),
u32(track.finalizedChunks.length),
track.finalizedChunks.map((chunk) => [
u64(intoTimescale(chunk.startTimestamp, track.timescale)),
u64(chunk.moofOffset),
u32(trackIndex + 1),
u32(1),
u32(1),
]),
]);
};
const mfro = () => {
return fullBox("mfro", 0, 0, [
u32(0),
]);
};
const VIDEO_CODEC_TO_BOX_NAME = {
avc: "avc1",
hevc: "hvc1",
vp9: "vp09",
av1: "av01",
};
const VIDEO_CODEC_TO_CONFIGURATION_BOX = {
avc: avcC,
hevc: hvcC,
vp9: vpcC,
av1: av1C,
};
const AUDIO_CODEC_TO_BOX_NAME = {
aac: "mp4a",
opus: "Opus",
};
const AUDIO_CODEC_TO_CONFIGURATION_BOX = {
aac: esds,
opus: dOps,
};
const isTarget = Symbol("isTarget");
class Target {
[isTarget];
}
class ArrayBufferTarget extends Target {
buffer = null;
}
class StreamTarget extends Target {
options;
constructor(options) {
super();
this.options = options;
if (typeof options !== "object") {
throw new TypeError("StreamTarget requires an options object to be passed to its constructor.");
}
if (options.onData) {
if (typeof options.onData !== "function") {
throw new TypeError("options.onData, when provided, must be a function.");
}
if (options.onData.length < 2) {
throw new TypeError("options.onData, when provided, must be a function that takes in at least two arguments (data and " +
"position). Ignoring the position argument, which specifies the byte offset at which the data is " +
"to be written, can lead to broken outputs.");
}
}
if (options.chunked !== undefined && typeof options.chunked !== "boolean") {
throw new TypeError("options.chunked, when provided, must be a boolean.");
}
if (options.chunkSize !== undefined &&
(!Number.isInteger(options.chunkSize) || options.chunkSize <= 0)) {
throw new TypeError("options.chunkSize, when provided, must be a positive integer.");
}
}
}
class FileSystemWritableFileStreamTarget extends Target {
stream;
options;
constructor(stream, options) {
super();
this.stream = stream;
this.options = options;
if (!(stream instanceof FileSystemWritableFileStream)) {
throw new TypeError("FileSystemWritableFileStreamTarget requires a FileSystemWritableFileStream instance.");
}
if (options !== undefined && typeof options !== "object") {
throw new TypeError("FileSystemWritableFileStreamTarget's options, when provided, must be an object.");
}
if (options) {
if (options.chunkSize !== undefined &&
(!Number.isInteger(options.chunkSize) || options.chunkSize <= 0)) {
throw new TypeError("options.chunkSize, when provided, must be a positive integer");
}
}
}
}
class Writer {
pos = 0;
#helper = new Uint8Array(8);
#helperView = new DataView(this.#helper.buffer);
offsets = new WeakMap();
seek(newPos) {
this.pos = newPos;
}
writeU32(value) {
this.#helperView.setUint32(0, value, false);
this.write(this.#helper.subarray(0, 4));
}
writeU64(value) {
this.#helperView.setUint32(0, Math.floor(value / 2 ** 32), false);
this.#helperView.setUint32(4, value, false);
this.write(this.#helper.subarray(0, 8));
}
writeAscii(text) {
for (let i = 0; i < text.length; i++) {
this.#helperView.setUint8(i % 8, text.charCodeAt(i));
if (i % 8 === 7)
this.write(this.#helper);
}
if (text.length % 8 !== 0) {
this.write(this.#helper.subarray(0, text.length % 8));
}
}
writeBox(box) {
this.offsets.set(box, this.pos);
if (box.contents && !box.children) {
this.writeBoxHeader(box, box.size ?? box.contents.byteLength + 8);
this.write(box.contents);
}
else {
let startPos = this.pos;
this.writeBoxHeader(box, 0);
if (box.contents)
this.write(box.contents);
if (box.children)
for (let child of box.children)
if (child)
this.writeBox(child);
let endPos = this.pos;
let size = box.size ?? endPos - startPos;
this.seek(startPos);
this.writeBoxHeader(box, size);
this.seek(endPos);
}
}
writeBoxHeader(box, size) {
this.writeU32(box.largeSize ? 1 : size);
this.writeAscii(box.type);
if (box.largeSize)
this.writeU64(size);
}
measureBoxHeader(box) {
return 8 + (box.largeSize ? 8 : 0);
}
patchBox(box) {
let endPos = this.pos;
this.seek(this.offsets.get(box));
this.writeBox(box);
this.seek(endPos);
}
measureBox(box) {
if (box.contents && !box.children) {
let headerSize = this.measureBoxHeader(box);
return headerSize + box.contents.byteLength;
}
else {
let result = this.measureBoxHeader(box);
if (box.contents)
result += box.contents.byteLength;
if (box.children)
for (let child of box.children)
if (child)
result += this.measureBox(child);
return result;
}
}
}
class ArrayBufferTargetWriter extends Writer {
#target;
#buffer = new ArrayBuffer(2 ** 16);
#bytes = new Uint8Array(this.#buffer);
#maxPos = 0;
constructor(target) {
super();
this.#target = target;
}
#ensureSize(size) {
let newLength = this.#buffer.byteLength;
while (newLength < size)
newLength *= 2;
if (newLength === this.#buffer.byteLength)
return;
let newBuffer = new ArrayBuffer(newLength);
let newBytes = new Uint8Array(newBuffer);
newBytes.set(this.#bytes, 0);
this.#buffer = newBuffer;
this.#bytes = newBytes;
}
write(data) {
this.#ensureSize(this.pos + data.byteLength);
this.#bytes.set(data, this.pos);
this.pos += data.byteLength;
this.#maxPos = Math.max(this.#maxPos, this.pos);
}
finalize() {
this.#ensureSize(this.pos);
this.#target.buffer = this.#buffer.slice(0, Math.max(this.#maxPos, this.pos));
}
}
class StreamTargetWriter extends Writer {
#target;
#sections = [];
constructor(target) {
super();
this.#target = target;
}
write(data) {
this.#sections.push({
data: data.slice(),
start: this.pos,
});
this.pos += data.byteLength;
}
flush() {
if (this.#sections.length === 0)
return;
let chunks = [];
let sorted = [...this.#sections].sort((a, b) => a.start - b.start);
chunks.push({
start: sorted[0].start,
size: sorted[0].data.byteLength,
});
for (let i = 1; i < sorted.length; i++) {
let lastChunk = chunks[chunks.length - 1];
let section = sorted[i];
if (section.start <= lastChunk.start + lastChunk.size) {
lastChunk.size = Math.max(lastChunk.size, section.start + section.data.byteLength - lastChunk.start);
}
else {
chunks.push({
start: section.start,
size: section.data.byteLength,
});
}
}
for (let chunk of chunks) {
chunk.data = new Uint8Array(chunk.size);
for (let section of this.#sections) {
if (chunk.start <= section.start &&
section.start < chunk.start + chunk.size) {
chunk.data.set(section.data, section.start - chunk.start);
}
}
this.#target.options.onData?.(chunk.data, chunk.start);
}
this.#sections.length = 0;
}
finalize() { }
}
const DEFAULT_CHUNK_SIZE = 2 ** 24;
const MAX_CHUNKS_AT_ONCE = 2;
class ChunkedStreamTargetWriter extends Writer {
#target;
#chunkSize;
#chunks = [];
constructor(target) {
super();
this.#target = target;
this.#chunkSize = target.options?.chunkSize ?? DEFAULT_CHUNK_SIZE;
if (!Number.isInteger(this.#chunkSize) || this.#chunkSize < 2 ** 10) {
throw new Error("Invalid StreamTarget options: chunkSize must be an integer not smaller than 1024.");
}
}
write(data) {
this.#writeDataIntoChunks(data, this.pos);
this.#flushChunks();
this.pos += data.byteLength;
}
#writeDataIntoChunks(data, position) {
let chunkIndex = this.#chunks.findIndex((x) => x.start <= position && position < x.start + this.#chunkSize);
if (chunkIndex === -1)
chunkIndex = this.#createChunk(position);
let chunk = this.#chunks[chunkIndex];
let relativePosition = position - chunk.start;
let toWrite = data.subarray(0, Math.min(this.#chunkSize - relativePosition, data.byteLength));
chunk.data.set(toWrite, relativePosition);
let section = {
start: relativePosition,
end: relativePosition + toWrite.byteLength,
};
this.#insertSectionIntoChunk(chunk, section);
if (chunk.written[0].start === 0 &&
chunk.written[0].end === this.#chunkSize) {
chunk.shouldFlush = true;
}
if (this.#chunks.length > MAX_CHUNKS_AT_ONCE) {
for (let i = 0; i < this.#chunks.length - 1; i++) {
this.#chunks[i].shouldFlush = true;
}
this.#flushChunks();
}
if (toWrite.byteLength < data.byteLength) {
this.#writeDataIntoChunks(data.subarray(toWrite.byteLength), position + toWrite.byteLength);
}
}
#insertSectionIntoChunk(chunk, section) {
let low = 0;
let high = chunk.written.length - 1;
let index = -1;
while (low <= high) {
let mid = Math.floor(low + (high - low + 1) / 2);
if (chunk.written[mid].start <= section.start) {
low = mid + 1;
index = mid;
}
else {
high = mid - 1;
}
}
chunk.written.splice(index + 1, 0, section);
if (index === -1 || chunk.written[index].end < section.start)
index++;
while (index < chunk.written.length - 1 &&
chunk.written[index].end >= chunk.written[index + 1].start) {
chunk.written[index].end = Math.max(chunk.written[index].end, chunk.written[index + 1].end);
chunk.written.splice(index + 1, 1);
}
}
#createChunk(includesPosition) {
let start = Math.floor(includesPosition / this.#chunkSize) * this.#chunkSize;
let chunk = {
start,
data: new Uint8Array(this.#chunkSize),
written: [],
shouldFlush: false,
};
this.#chunks.push(chunk);
this.#chunks.sort((a, b) => a.start - b.start);
return this.#chunks.indexOf(chunk);
}
#flushChunks(force = false) {
for (let i = 0; i < this.#chunks.length; i++) {
let chunk = this.#chunks[i];
if (!chunk.shouldFlush && !force)
continue;
for (let section of chunk.written) {
this.#target.options.onData?.(chunk.data.subarray(section.start, section.end), chunk.start + section.start);
}
this.#chunks.splice(i--, 1);
}
}
finalize() {
this.#flushChunks(true);
}
}
class FileSystemWritableFileStreamTargetWriter extends ChunkedStreamTargetWriter {
constructor(target) {
super(new StreamTarget({
onData: (data, position) => target.stream.write({
type: "write",
data,
position,
}),
chunkSize: target.options?.chunkSize,
}));
}
}
const GLOBAL_TIMESCALE = 1000;
const SUPPORTED_VIDEO_CODECS = ["avc", "hevc", "vp9", "av1"];
const SUPPORTED_AUDIO_CODECS = ["aac", "opus"];
const TIMESTAMP_OFFSET = 2_082_844_800;
const FIRST_TIMESTAMP_BEHAVIORS = [
"strict",
"offset",
"cross-track-offset",
];
class Muxer {
target;
#options;
#writer;
#ftypSize;
#mdat;
#videoTrack = null;
#audioTrack = null;
#creationTime = Math.floor(Date.now() / 1000) + TIMESTAMP_OFFSET;
#finalizedChunks = [];
#nextFragmentNumber = 1;
#videoSampleQueue = [];
#audioSampleQueue = [];
#finalized = false;
constructor(options) {
this.#validateOptions(options);
options.video = deepClone(options.video);
options.audio = deepClone(options.audio);
options.fastStart = deepClone(options.fastStart);
this.target = options.target;
this.#options = {
firstTimestampBehavior: "strict",
...options,
};
if (options.target instanceof ArrayBufferTarget) {
this.#writer = new ArrayBufferTargetWriter(options.target);
}
else if (options.target instanceof StreamTarget) {
this.#writer = options.target.options?.chunked
? new ChunkedStreamTargetWriter(options.target)
: new StreamTargetWriter(options.target);
}
else if (options.target instanceof FileSystemWritableFileStreamTarget) {
this.#writer = new FileSystemWritableFileStreamTargetWriter(options.target);
}
else {
throw new Error(`Invalid target: ${options.target}`);
}
this.#prepareTracks();
this.#writeHeader();
}
#validateOptions(options) {
if (typeof options !== "object") {
throw new TypeError("The muxer requires an options object to be passed to its constructor.");
}
if (!(options.target instanceof Target)) {
throw new TypeError("The target must be provided and an instance of Target.");
}
if (options.video) {
if (!SUPPORTED_VIDEO_CODECS.includes(options.video.codec)) {
throw new TypeError(`Unsupported video codec: ${options.video.codec}`);
}
if (!Number.isInteger(options.video.width) || options.video.width <= 0) {
throw new TypeError(`Invalid video width: ${options.video.width}. Must be a positive integer.`);
}
if (!Number.isInteger(options.video.height) ||
options.video.height <= 0) {
throw new TypeError(`Invalid video height: ${options.video.height}. Must be a positive integer.`);
}
const videoRotation = options.video.rotation;
if (typeof videoRotation === "number" &&
![0, 90, 180, 270].includes(videoRotation)) {
throw new TypeError(`Invalid video rotation: ${videoRotation}. Has to be 0, 90, 180 or 270.`);
}
else if (Array.isArray(videoRotation) &&
(videoRotation.length !== 9 ||
videoRotation.some((value) => typeof value !== "number"))) {
throw new TypeError(`Invalid video transformation matrix: ${videoRotation.join()}`);
}
if (options.video.frameRate !== undefined &&
(!Number.isInteger(options.video.frameRate) ||
options.video.frameRate <= 0)) {
throw new TypeError(`Invalid video frame rate: ${options.video.frameRate}. Must be a positive integer.`);
}
}
if (options.audio) {
if (!SUPPORTED_AUDIO_CODECS.includes(options.audio.codec)) {
throw new TypeError(`Unsupported audio codec: ${options.audio.codec}`);
}
if (!Number.isInteger(options.audio.numberOfChannels) ||
options.audio.numberOfChannels <= 0) {
throw new TypeError(`Invalid number of audio channels: ${options.audio.numberOfChannels}. Must be a positive integer.`);
}
if (!Number.isInteger(options.audio.sampleRate) ||
options.audio.sampleRate <= 0) {
throw new TypeError(`Invalid audio sample rate: ${options.audio.sampleRate}. Must be a positive integer.`);
}
}
if (options.firstTimestampBehavior &&
!FIRST_TIMESTAMP_BEHAVIORS.includes(options.firstTimestampBehavior)) {
throw new TypeError(`Invalid first timestamp behavior: ${options.firstTimestampBehavior}`);
}
if (typeof options.fastStart === "object") {
if (options.video) {
if (options.fastStart.expectedVideoChunks === undefined) {
throw new TypeError(`'fastStart' is an object but is missing property 'expectedVideoChunks'.`);
}
else if (!Number.isInteger(options.fastStart.expectedVideoChunks) ||
options.fastStart.expectedVideoChunks < 0) {
throw new TypeError(`'expectedVideoChunks' must be a non-negative integer.`);
}
}
if (options.audio) {
if (options.fastStart.expectedAudioChunks === undefined) {
throw new TypeError(`'fastStart' is an object but is missing property 'expectedAudioChunks'.`);
}
else if (!Number.isInteger(options.fastStart.expectedAudioChunks) ||
options.fastStart.expectedAudioChunks < 0) {
throw new TypeError(`'expectedAudioChunks' must be a non-negative integer.`);
}
}
}
else if (![false, "in-memory", "fragmented"].includes(options.fastStart)) {
throw new TypeError(`'fastStart' option must be false, 'in-memory', 'fragmented' or an object.`);
}
}
#writeHeader() {
this.#writer.writeBox(ftyp({
holdsAvc: this.#options.video?.codec === "avc",
fragmented: this.#options.fastStart === "fragmented",
}));
this.#ftypSize = this.#writer.pos;
if (this.#options.fastStart === "in-memory") {
this.#mdat = mdat(false);
}
else if (this.#options.fastStart === "fragmented") {
}
else {
if (typeof this.#options.fastStart === "object") {
let moovSizeUpperBound = this.#computeMoovSizeUpperBound();
this.#writer.seek(this.#writer.pos + moovSizeUpperBound);
}
this.#mdat = mdat(true);
this.#writer.writeBox(this.#mdat);
}
this.#maybeFlushStreamingTargetWriter();
}
#computeMoovSizeUpperBound() {
if (typeof this.#options.fastStart !== "object")
return;
let upperBound = 0;
let sampleCounts = [
this.#options.fastStart.expectedVideoChunks,
this.#options.fastStart.expectedAudioChunks,
];
for (let n of sampleCounts) {
if (!n)
continue;
upperBound += (4 + 4) * Math.ceil((2 / 3) * n);
upperBound += 4 * n;
upperBound += (4 + 4 + 4) * Math.ceil((2 / 3) * n);
upperBound += 4 * n;
upperBound += 8 * n;
}
upperBound += 4096;
return upperBound;
}
#prepareTracks() {
if (this.#options.video) {
this.#videoTrack = {
id: 1,
info: {
type: "video",
codec: this.#options.video.codec,
width: this.#options.video.width,
height: this.#options.video.height,
rotation: this.#options.video.rotation ?? 0,
decoderConfig: null,
},
timescale: this.#options.video.frameRate ?? 57600,
samples: [],
finalizedChunks: [],
currentChunk: null,
firstDecodeTimestamp: undefined,
lastDecodeTimestamp: -1,
timeToSampleTable: [],
compositionTimeOffsetTable: [],
lastTimescaleUnits: null,
lastSample: null,
compactlyCodedChunkTable: [],
};
}
if (this.#options.audio) {
this.#audioTrack = {
id: this.#options.video ? 2 : 1,
info: {
type: "audio",
codec: this.#options.audio.codec,
numberOfChannels: this.#options.audio.numberOfChannels,
sampleRate: this.#options.audio.sampleRate,
decoderConfig: null,
},
timescale: this.#options.audio.sampleRate,
samples: [],
finalizedChunks: [],
currentChunk: null,
firstDecodeTimestamp: undefined,
lastDecodeTimestamp: -1,
timeToSampleTable: [],
compositionTimeOffsetTable: [],
lastTimescaleUnits: null,
lastSample: null,
compactlyCodedChunkTable: [],
};
if (this.#options.audio.codec === "aac") {
let guessedCodecPrivate = this.#generateMpeg4AudioSpecificConfig(2, this.#options.audio.sampleRate, this.#options.audio.numberOfChannels);
this.#audioTrack.info.decoderConfig = {
codec: this.#options.audio.codec,
description: guessedCodecPrivate,
numberOfChannels: this.#options.audio.numberOfChannels,
sampleRate: this.#options.audio.sampleRate,
};
}
}
}
#generateMpeg4AudioSpecificConfig(objectType, sampleRate, numberOfChannels) {
let frequencyIndices = [
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000,
11025, 8000, 7350,
];
let frequencyIndex = frequencyIndices.indexOf(sampleRate);
let channelConfig = numberOfChannels;
let configBits = "";
configBits += objectType.toString(2).padStart(5, "0");
configBits += frequencyIndex.toString(2).padStart(4, "0");
if (frequencyIndex === 15)
configBits += sampleRate.toString(2).padStart(24, "0");
configBits += channelConfig.toString(2).padStart(4, "0");
let paddingLength = Math.ceil(configBits.length / 8) * 8;
configBits = configBits.padEnd(paddingLength, "0");
let configBytes = new Uint8Array(configBits.length / 8);
for (let i = 0; i < configBits.length; i += 8) {
configBytes[i / 8] = parseInt(configBits.slice(i, i + 8), 2);
}
return configBytes;
}
addVideoChunk(sample, meta, timestamp, compositionTimeOffset) {
if (!(sample instanceof EncodedVideoChunk)) {
throw new TypeError("addVideoChunk's first argument (sample) must be of type EncodedVideoChunk.");
}
if (meta && typeof meta !== "object") {
throw new TypeError("addVideoChunk's second argument (meta), when provided, must be an object.");
}
if (timestamp !== undefined &&
(!Number.isFinite(timestamp) || timestamp < 0)) {
throw new TypeError("addVideoChunk's third argument (timestamp), when provided, must be a non-negative real number.");
}
if (compositionTimeOffset !== undefined &&
!Number.isFinite(compositionTimeOffset)) {
throw new TypeError("addVideoChunk's fourth argument (compositionTimeOffset), when provided, must be a real number.");
}
let data = new Uint8Array(sample.byteLength);
sample.copyTo(data);
this.addVideoChunkRaw(data, sample.type, timestamp ?? sample.timestamp, sample.duration, meta, compositionTimeOffset);
}
addVideoChunkRaw(data, type, timestamp, duration, meta, compositionTimeOffset) {
if (!(data instanceof Uint8Array)) {
throw new TypeError("addVideoChunkRaw's first argument (data) must be an instance of Uint8Array.");
}
if (type !== "key" && type !== "delta") {
throw new TypeError("addVideoChunkRaw's second argument (type) must be either 'key' or 'delta'.");
}
if (!Number.isFinite(timestamp) || timestamp < 0) {
throw new TypeError("addVideoChunkRaw's third argument (timestamp) must be a non-negative real number.");
}
if (!Number.isFinite(duration) || duration < 0) {
throw new TypeError("addVideoChunkRaw's fourth argument (duration) must be a non-negative real number.");
}
if (meta && typeof meta !== "object") {
throw new TypeError("addVideoChunkRaw's fifth argument (meta), when provided, must be an object.");
}
if (compositionTimeOffset !== undefined &&
!Number.isFinite(compositionTimeOffset)) {
throw new TypeError("addVideoChunkRaw's sixth argument (compositionTimeOffset), when provided, must be a real number.");
}
this.#ensureNotFinalized();
if (!this.#options.video)
throw new Error("No video track declared.");
if (typeof this.#options.fastStart === "object" &&
this.#videoTrack.samples.length ===
this.#options.fastStart.expectedVideoChunks) {
throw new Error(`Cannot add more video chunks than specified in 'fastStart' (${this.#options.fastStart.expectedVideoChunks}).`);
}
let videoSample = this.#createSampleForTrack(this.#videoTrack, data, type, timestamp, duration, meta, compositionTimeOffset);
if (this.#options.fastStart === "fragmented" && this.#audioTrack) {
while (this.#audioSampleQueue.length > 0 &&
this.#audioSampleQueue[0].decodeTimestamp <= videoSample.decodeTimestamp) {
let audioSample = this.#audioSampleQueue.shift();
this.#addSampleToTrack(this.#audioTrack, audioSample);
}
if (videoSample.decodeTimestamp <= this.#audioTrack.lastDecodeTimestamp) {
this.#addSampleToTrack(this.#videoTrack, videoSample);
}
else {
this.#videoSampleQueue.push(videoSample);
}
}
else {
this.#addSampleToTrack(this.#videoTrack, videoSample);
}
}
addAudioChunk(sample, meta, timestamp) {
if (!(sample instanceof EncodedAudioChunk)) {
throw new TypeError("addAudioChunk's first argument (sample) must be of type EncodedAudioChunk.");
}
if (meta && typeof meta !== "object") {
throw new TypeError("addAudioChunk's second argument (meta), when provided, must be an object.");
}
if (timestamp !== undefined &&
(!Number.isFinite(timestamp) || timestamp < 0)) {
throw new TypeError("addAudioChunk's third argument (timestamp), when provided, must be a non-negative real number.");
}
let data = new Uint8Array(sample.byteLength);
sample.copyTo(data);
this.addAudioChunkRaw(data, sample.type, timestamp ?? sample.timestamp, sample.duration, meta);
}
addAudioChunkRaw(data, type, timestamp, duration, meta) {
if (!(data instanceof Uint8Array)) {
throw new TypeError("addAudioChunkRaw's first argument (data) must be an instance of Uint8Array.");
}
if (type !== "key" && type !== "delta") {
throw new TypeError("addAudioChunkRaw's second argument (type) must be either 'key' or 'delta'.");
}
if (!Number.isFinite(timestamp) || timestamp < 0) {
throw new TypeError("addAudioChunkRaw's third argument (timestamp) must be a non-negative real number.");
}
if (!Number.isFinite(duration) || duration < 0) {
throw new TypeError("addAudioChunkRaw's fourth argument (duration) must be a non-negative real number.");
}
if (meta && typeof meta !== "object") {
throw new TypeError("addAudioChunkRaw's fifth argument (meta), when provided, must be an object.");
}
this.#ensureNotFinalized();
if (!this.#options.audio)
throw new Error("No audio track declared.");
if (typeof this.#options.fastStart === "object" &&
this.#audioTrack.samples.length ===
this.#options.fastStart.expectedAudioChunks) {
throw new Error(`Cannot add more audio chunks than specified in 'fastStart' (${this.#options.fastStart.expectedAudioChunks}).`);
}
let audioSample = this.#createSampleForTrack(this.#audioTrack, data, type, timestamp, duration, meta);
if (this.#options.fastStart === "fragmented" && this.#videoTrack) {
while (this.#videoSampleQueue.length > 0 &&
this.#videoSampleQueue[0].decodeTimestamp <= audioSample.decodeTimestamp) {
let videoSample = this.#videoSampleQueue.shift();
this.#addSampleToTrack(this.#videoTrack, videoSample);
}
if (audioSample.decodeTimestamp <= this.#videoTrack.lastDecodeTimestamp) {
this.#addSampleToTrack(this.#audioTrack, audioSample);
}
else {
this.#audioSampleQueue.push(audioSample);
}
}
else {
this.#addSampleToTrack(this.#audioTrack, audioSample);
}
}
#createSampleForTrack(track, data, type, timestamp, duration, meta, compositionTimeOffset) {
let presentationTimestampInSeconds = timestamp / 1e6;
let decodeTimestampInSeconds = (timestamp - (compositionTimeOffset ?? 0)) / 1e6;
let durationInSeconds = duration / 1e6;
let adjusted = this.#validateTimestamp(presentationTimestampInSeconds, decodeTimestampInSeconds, track);
presentationTimestampInSeconds = adjusted.presentationTimestamp;
decodeTimestampInSeconds = adjusted.decodeTimestamp;
if (meta?.decoderConfig) {
if (track.info.decoderConfig === null) {
track.info.decoderConfig = meta.decoderConfig;
}
else {
Object.assign(track.info.decoderConfig, meta.decoderConfig);
}
}
let sample = {
presentationTimestamp: presentationTimestampInSeconds,
decodeTimestamp: decodeTimestampInSeconds,
duration: durationInSeconds,
data: data,
size: data.byteLength,
type: type,
timescaleUnitsToNextSample: intoTimescale(durationInSeconds, track.timescale),
};
return sample;
}
#addSampleToTrack(track, sample) {
if (this.#options.fastStart !== "fragmented") {
track.samples.push(sample);
}
const sampleCompositionTimeOffset = intoTimescale(sample.presentationTimestamp - sample.decodeTimestamp, track.timescale);
if (track.lastTimescaleUnits !== null) {
let timescaleUnits = intoTimescale(sample.decodeTimestamp, track.timescale, false);
let delta = Math.round(timescaleUnits - track.lastTimescaleUnits);
track.lastTimescaleUnits += delta;
track.lastSample.timescaleUnitsToNextSample = delta;
if (this.#options.fastStart !== "fragmented") {
let lastTableEntry = last(track.timeToSampleTable);
if (lastTableEntry.sampleCount === 1) {
lastTableEntry.sampleDelta = delta;
lastTableEntry.sampleCount++;
}
else if (lastTableEntry.sampleDelta === delta) {
lastTableEntry.sampleCount++;
}
else {
lastTableEntry.sampleCount--;
track.timeToSampleTable.push({
sampleCount: 2,
sampleDelta: delta,
});
}
const lastCompositionTimeOffsetTableEntry = last(track.compositionTimeOffsetTable);
if (lastCompositionTimeOffsetTableEntry.sampleCompositionTimeOffset ===
sampleCompositionTimeOffset) {
lastCompositionTimeOffsetTableEntry.sampleCount++;
}
else {
track.compositionTimeOffsetTable.push({
sampleCount: 1,
sampleCompositionTimeOffset: sampleCompositionTimeOffset,
});
}
}
}
else {
track.lastTimescaleUnits = 0;
if (this.#options.fastStart !== "fragmented") {
track.timeToSampleTable.push({
sampleCount: 1,
sampleDelta: intoTimescale(sample.duration, track.timescale),
});
track.compositionTimeOffsetTable.push({
sampleCount: 1,
sampleCompositionTimeOffset: sampleCompositionTimeOffset,
});
}
}
track.lastSample = sample;
let beginNewChunk = false;
if (!track.currentChunk) {
beginNewChunk = true;
}
else {
let currentChunkDuration = sample.presentationTimestamp - track.currentChunk.startTimestamp;
if (this.#options.fastStart === "fragmented") {
let mostImportantTrack = this.#videoTrack ?? this.#audioTrack;
if (track === mostImportantTrack &&
sample.type === "key" &&
currentChunkDuration >= 1.0) {
beginNewChunk = true;
this.#finalizeFragment();
}
}
else {
beginNewChunk = currentChunkDuration >= 0.5;
}
}
if (beginNewChunk) {
if (track.currentChunk) {
this.#finalizeCurrentChunk(track);
}
track.currentChunk = {
startTimestamp: sample.presentationTimestamp,
samples: [],
};
}
track.currentChunk.samples.push(sample);
}
#validateTimestamp(presentationTimestamp, decodeTimestamp, track) {
const strictTimestampBehavior = this.#options.firstTimestampBehavior === "strict";
const noLastDecodeTimestamp = track.lastDecodeTimestamp === -1;
const timestampNonZero = decodeTimestamp !== 0;
if (strictTimestampBehavior && noLastDecodeTimestamp && timestampNonZero) {
throw new Error(`The first chunk for your media track must have a timestamp of 0 (received DTS=${decodeTimestamp}).` +
`Non-zero first timestamps are often caused by directly piping frames or audio data from a ` +
`MediaStreamTrack into the encoder. Their timestamps are typically relative to the age of the` +
`document, which is probably what you want.\n\nIf you want to offset all timestamps of a track such ` +
`that the first one is zero, set firstTimestampBehavior: 'offset' in the options.\n`);
}
else if (this.#options.firstTimestampBehavior === "offset" ||
this.#options.firstTimestampBehavior === "cross-track-offset") {
if (track.firstDecodeTimestamp === undefined) {
track.firstDecodeTimestamp = decodeTimestamp;
}
let baseDecodeTimestamp;
if (this.#options.firstTimestampBehavior === "offset") {
baseDecodeTimestamp = track.firstDecodeTimestamp;
}
else {
baseDecodeTimestamp = Math.min(this.#videoTrack?.firstDecodeTimestamp ?? Infinity, this.#audioTrack?.firstDecodeTimestamp ?? Infinity);
}
decodeTimestamp -= baseDecodeTimestamp;
presentationTimestamp -= baseDecodeTimestamp;
}
if (decodeTimestamp < track.lastDecodeTimestamp) {
throw new Error(`Timestamps must be monotonically increasing ` +
`(DTS went from ${track.lastDecodeTimestamp * 1e6} to ${decodeTimestamp * 1e6}).`);
}
track.lastDecodeTimestamp = decodeTimestamp;
return {presentationTimestamp, decodeTimestamp};
}
#finalizeCurrentChunk(track) {
if (this.#options.fastStart === "fragmented") {
throw new Error("Can't finalize individual chunks if 'fastStart' is set to 'fragmented'.");
}
if (!track.currentChunk)
return;
track.finalizedChunks.push(track.currentChunk);
this.#finalizedChunks.push(track.currentChunk);
if (track.compactlyCodedChunkTable.length === 0 ||
last(track.compactlyCodedChunkTable).samplesPerChunk !==
track.currentChunk.samples.length) {
track.compactlyCodedChunkTable.push({
firstChunk: track.finalizedChunks.length,
samplesPerChunk: track.currentChunk.samples.length,
});
}
if (this.#options.fastStart === "in-memory") {
track.currentChunk.offset = 0;
return;
}
track.currentChunk.offset = this.#writer.pos;
for (let sample of track.currentChunk.samples) {
this.#writer.write(sample.data);
sample.data = null;
}
this.#maybeFlushStreamingTargetWriter();
}
#finalizeFragment(flushStreamingWriter = true) {
if (this.#options.fastStart !== "fragmented") {
throw new Error("Can't finalize a fragment unless 'fastStart' is set to 'fragmented'.");
}
let tracks = [this.#videoTrack, this.#audioTrack].filter((track) => track && track.currentChunk);
if (tracks.length === 0)
return;
let fragmentNumber = this.#nextFragmentNumber++;
if (fragmentNumber === 1) {
let movieBox = moov(tracks, this.#creationTime, true);
this.#writer.writeBox(movieBox);
}
let moofOffset = this.#writer.pos;
let moofBox = moof(fragmentNumber, tracks);
this.#writer.writeBox(moofBox);
{
let mdatBox = mdat(false);
let totalTrackSampleSize = 0;
for (let track of tracks) {
for (let sample of track.currentChunk.samples) {
totalTrackSampleSize += sample.size;
}
}
let mdatSize = this.#writer.measureBox(mdatBox) + totalTrackSampleSize;
if (mdatSize >= 2 ** 32) {
mdatBox.largeSize = true;
mdatSize = this.#writer.measureBox(mdatBox) + totalTrackSampleSize;
}
mdatBox.size = mdatSize;
this.#writer.writeBox(mdatBox);
}
for (let track of tracks) {
track.currentChunk.offset = this.#writer.pos;
track.currentChunk.moofOffset = moofOffset;
for (let sample of track.currentChunk.samples) {
this.#writer.write(sample.data);
sample.data = null;
}
}
let endPos = this.#writer.pos;
this.#writer.seek(this.#writer.offsets.get(moofBox));
let newMoofBox = moof(fragmentNumber, tracks);
this.#writer.writeBox(newMoofBox);
this.#writer.seek(endPos);
for (let track of tracks) {
track.finalizedChunks.push(track.currentChunk);
this.#finalizedChunks.push(track.currentChunk);
track.currentChunk = null;
}
if (flushStreamingWriter) {
this.#maybeFlushStreamingTargetWriter();
}
}
#maybeFlushStreamingTargetWriter() {
if (this.#writer instanceof StreamTargetWriter) {
this.#writer.flush();
}
}
#ensureNotFinalized() {
if (this.#finalized) {
throw new Error("Cannot add new video or audio chunks after the file has been finalized.");
}
}
finalize() {
if (this.#finalized) {
throw new Error("Cannot finalize a muxer more than once.");
}
if (this.#options.fastStart === "fragmented") {
for (let videoSample of this.#videoSampleQueue)
this.#addSampleToTrack(this.#videoTrack, videoSample);
for (let audioSample of this.#audioSampleQueue)
this.#addSampleToTrack(this.#audioTrack, audioSample);
this.#finalizeFragment(false);
}
else {
if (this.#videoTrack)
this.#finalizeCurrentChunk(this.#videoTrack);
if (this.#audioTrack)
this.#finalizeCurrentChunk(this.#audioTrack);
}
let tracks = [this.#videoTrack, this.#audioTrack].filter(Boolean);
if (this.#options.fastStart === "in-memory") {
let mdatSize;
for (let i = 0; i < 2; i++) {
let movieBox = moov(tracks, this.#creationTime);
let movieBoxSize = this.#writer.measureBox(movieBox);
mdatSize = this.#writer.measureBox(this.#mdat);
let currentChunkPos = this.#writer.pos + movieBoxSize + mdatSize;
for (let chunk of this.#finalizedChunks) {
chunk.offset = currentChunkPos;
for (let {data} of chunk.samples) {
currentChunkPos += data.byteLength;
mdatSize += data.byteLength;
}
}
if (currentChunkPos < 2 ** 32)
break;
if (mdatSize >= 2 ** 32)
this.#mdat.largeSize = true;
}
let movieBox = moov(tracks, this.#creationTime);
this.#writer.writeBox(movieBox);
this.#mdat.size = mdatSize;
this.#writer.writeBox(this.#mdat);
for (let chunk of this.#finalizedChunks) {
for (let sample of chunk.samples) {
this.#writer.write(sample.data);
sample.data = null;
}
}
}
else if (this.#options.fastStart === "fragmented") {
let startPos = this.#writer.pos;
let mfraBox = mfra(tracks);
this.#writer.writeBox(mfraBox);
let mfraBoxSize = this.#writer.pos - startPos;
this.#writer.seek(this.#writer.pos - 4);
this.#writer.writeU32(mfraBoxSize);
}
else {
let mdatPos = this.#writer.offsets.get(this.#mdat);
let mdatSize = this.#writer.pos - mdatPos;
this.#mdat.size = mdatSize;
this.#mdat.largeSize = mdatSize >= 2 ** 32;
this.#writer.patchBox(this.#mdat);
let movieBox = moov(tracks, this.#creationTime);
if (typeof this.#options.fastStart === "object") {
this.#writer.seek(this.#ftypSize);
this.#writer.writeBox(movieBox);
let remainingBytes = mdatPos - this.#writer.pos;
this.#writer.writeBox(free(remainingBytes));
}
else {
this.#writer.writeBox(movieBox);
}
}
this.#maybeFlushStreamingTargetWriter();
this.#writer.finalize();
this.#finalized = true;
}
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment