Last active
May 31, 2024 23:03
-
-
Save lifthrasiir/1c7f9c5a421ad39c1af19a9c4f060743 to your computer and use it in GitHub Desktop.
Brotli bootstrapping based on WOFF2 font
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Brotli bootstrapping based on WOFF2 font | |
// Kang Seonghoon, 2024-03-01, public domain | |
// Based on my earlier work for JS1024: https://js1024.fun/demos/2022/18 | |
// Only vaguely tested, YMMV | |
import { argv, stderr } from 'node:process'; | |
import fs from 'node:fs'; | |
import zlib from 'node:zlib'; | |
const generateBrotliParams = function* () { | |
for (const mode of [ | |
zlib.constants.BROTLI_MODE_GENERIC, | |
zlib.constants.BROTLI_MODE_TEXT, | |
zlib.constants.BROTLI_MODE_FONT | |
]) { | |
// further options don't seem to help that well | |
yield { | |
[zlib.constants.BROTLI_PARAM_QUALITY]: zlib.constants.BROTLI_MAX_QUALITY, | |
[zlib.constants.BROTLI_PARAM_MODE]: mode, | |
[zlib.constants.BROTLI_PARAM_LGWIN]: 16, // force the shortest WBITS encoding possible | |
}; | |
} | |
}; | |
const compressBrotliWithEmptyMetadata = (input, params) => { | |
const c = zlib.createBrotliCompress({ params }); | |
c.flush(zlib.constants.BROTLI_OPERATION_EMIT_METADATA); | |
c.write(input); | |
c.end(); | |
return new Promise((resolve, reject) => { | |
const chunks = []; | |
c.on('data', chunk => chunks.push(chunk)); | |
c.on('end', () => resolve(Buffer.concat(chunks))); | |
c.on('error', e => reject(e)); | |
}); | |
}; | |
const fillBrotliMetadata = (b, metadata) => { | |
if (metadata.length === 0) return b; | |
const l = metadata.length - 1; | |
if (l >= 256) throw 'metadata too long'; | |
// the first metablock should have been empty, | |
// so it can only depend on the stream header, i.e. the window size. | |
// | |
// [keys] W: WBITS, L: ISLAST, N: MNIBBLES, S: MSKIPBYTES, s: MSKIPLEN-1 | |
if (b[0] == 0b0001100) { | |
// SS NNLW sSS NNLW sssssss | |
// .0001100 -> y0101100 .yyyyyyy | |
return [(l & 1) << 7 | 0b0101100, l >> 1, ...metadata, ...b.slice(1)]; | |
} else if ((b[0] & 0b11110001) == 0b01100001 && b[1] == 0b00) { | |
// NNLWWWW SS NNLWWWW ssssssSS ss | |
// 0110xxx1 ......00 -> 0110xxx1 yyyyyy01 ......yy | |
return [b[0], (l & 63) << 2 | 0b01, l >> 6, ...metadata, ...b.slice(2)]; | |
} else if ((b[0] & 0b10001111) == 0b00000001 && b[1] == 0b00011) { | |
// LWWWWWWW SS NN LWWWWWWW sssSS NN sssss | |
// 0xxx0001 ...00011 -> 0xxx0001 yyy01011 ...yyyyy | |
return [b[0], (l & 7) << 5 | 0b01011, l >> 3, ...metadata, ...b.slice(2)]; | |
} else { | |
throw 'unexpected brotli stream'; | |
} | |
}; | |
const problematicInBase128 = n => { | |
return n % 128 === 62 /*>*/; | |
}; | |
const makeFont = async (input, options = {}) => { | |
const ascii = s => Array.from(s, c => c.charCodeAt(0)); | |
const two = v => [v >>> 8 & 255, v & 255]; | |
const four = v => [v >>> 24 & 255, v >>> 16 & 255, v >>> 8 & 255, v & 255]; | |
const base128 = v => { | |
const b = []; | |
while (v > 127) b.unshift(128 | v & 127), v >>>= 7; | |
b.unshift(128 | v); | |
b[b.length - 1] &= 127; | |
return b; | |
}; | |
const repeat = (n, f) => { | |
const r = []; | |
for (let i = 0; i < n; ++i) r.push(...f(i)); | |
return r; | |
}; | |
// the first glyph has a known advance width and used to scale subsequent glyphs. | |
// note that we can't really use excess glyphs, which are NOT actually mapped. | |
// | |
// normally this is not required, but Firefox seems to do a less accurate calculation | |
// during the metric calculation, so all advance widths are effectively scaled by (1 +/- epsilon). | |
// the scale factor seems to be uniform for almost all cases except for small glyphs, | |
// so we need to use a large enough power-of-two advance glyph for this purpose. | |
// the "small glyphs" depend on devices; 256 seems to be good enough for mobiles. | |
const refAdvance = Number(options['reference-advance'] ?? 256); | |
if (typeof input === 'string') input = ascii(input); | |
input = [...refAdvance ? two(refAdvance) : [], ...input]; | |
if (!input.every(c => c < 0x80)) throw 'ASCII only'; | |
if (input.length % 2 !== 0) input.push(32); | |
input.push(...two(0)); // sentinel for the bootstrap loop | |
// the font file, when reinterpreted as HTML, should look like this: | |
// | |
// +--------------------------------+--- file or table sizes | |
// +-|-+ +--------+ +-|-+ +----------- | |
// +---------|---|---|--------|--+------------|---|-+---------|----------- | |
// | header | | |><body | | table dir. | | | brotli | onload=".. | |
// +---------|---|---|--------|--+------------|---|-+---------|----------- | |
// +---+ +---|----+ +---+ +----|------ | |
// unused header fields brotli metadata | |
// | |
// we want that `<body ` and ` onload="` are contained in the same tag. | |
// in order to do this, we need to ensure that: | |
// | |
// 1. size fields in the header should not prevent `<body` to start a new tag. | |
// | |
// this is largely done by putting an additional `>` before `<body`, | |
// so for example a size of 0x3c41 (`\x00\x00<A`) can start a new tag, | |
// but that tag will be closed before `<body` appears. | |
// | |
// it is possible that the attribute value may have started via `<A B="`, | |
// or a comment started via `<!--`, but both require at least 4 bytes of | |
// size bytes to match the pattern, and some sizes should exceed at least | |
// 500 MB, which should be impossible in our usages. | |
// | |
// 2. size fields in the table directory should not close the prior `<body` tag, | |
// or start a new attribute value in that tag. | |
// | |
// this is only possible via the lowermost bits of table sizes (both | |
// original or transformed). they are encoded in base 128, where all but | |
// last bytes have an MSB set, so only the last byte, i.e. `size % 128`, | |
// can ever become problematic, i.e. when it encodes `>` (0x3e). | |
// | |
// the longest run of problematic bytes in this case is 2, where both | |
// the original length and transformed length contain the problematic | |
// last byte in the same table, and the transformed length is less than 128. | |
// then a sequence like `="` or `='` may occur. but that's a very strong | |
// requirement, so we can easily show that we never generate such tables. | |
// | |
// the following code covers the second requirement by checking for potentially | |
// problematic table sizes, and increase `excessGlyphs` to avoid them. | |
// such sizes are uncommon enough (about 3%) so this won't loop forever. | |
let excessGlyphs = 1; // should be at least 1 | |
let numGlyphs = input.length / 2 + excessGlyphs; | |
while ( | |
problematicInBase128(2 + 2 * numGlyphs) || // loca size | |
problematicInBase128(40 + numGlyphs * 2 + ((numGlyphs + 31) >>> 5) * 4) // glyf size | |
) { | |
++excessGlyphs; | |
++numGlyphs; | |
} | |
const startCodeStr = String(options['start-code'] ?? '3e4'); | |
const startCode = +startCodeStr; | |
const endCode = startCode + input.length / 2 - 1; | |
const maxWidth = 0x7f00; // this is signed and can't be 0xffff | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/head | |
const head = [ | |
...two(1), // majorVersion | |
...two(0), // minorVersion | |
...four(0), // fontRevision | |
...four(0), // checksumAdjustment (WOFF2 recomputes this, so we don't have to be correct) | |
...four(0x5f0f3cf5), // magicNumber | |
...two(1<<0 | 1<<1 | 1<<3), // flags | |
...two(16), // unitsPerEm (the minimum possible) | |
0,0,0,0,0,0,0,0, // created | |
0,0,0,0,0,0,0,0, // modified | |
...two(0), ...two(0), // xMin, yMin | |
...two(maxWidth), ...two(0), // xMax, yMax (excess xMax is okay, as long as it's positive) | |
...two(0), // macStyle | |
...two(0), // lowestRectPPEM (0 is probably okay) | |
...two(0), // fontDirectionHint (deprecated) | |
...two(0), // indexToLocFormat | |
...two(0), // glyphDataFormat | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/hhea | |
const hhea = [ | |
...two(1), // majorVersion | |
...two(0), // minorVersion | |
...two(0), // ascender | |
...two(0), // descender | |
...two(0), // lineGap | |
...two(maxWidth), // advanceWidthmax | |
...two(0), // minLeftSideBearing | |
...two(0), // minRightSideBearing | |
...two(maxWidth), // xMaxExtent | |
...two(1), // caretSlopeRise | |
...two(0), // caretSlopeRun | |
...two(0), // caretOffset | |
...two(0), ...two(0), ...two(0), ...two(0), // reserved0..3 | |
...two(0), // metricDataFormat | |
...two(numGlyphs), // numberOfHMetrics | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/maxp | |
const maxp = [ | |
...four(0x00010000), // version | |
...two(numGlyphs), // numGlyphs | |
...two(1), // maxPoints (at least 1) | |
...two(1), // maxContours (at least 1) | |
...two(0), // maxCompositePoints | |
...two(0), // maxCompositeContours | |
...two(2), // maxZones | |
...two(0), // maxTwilightPoints | |
...two(0), // maxStorage | |
...two(0), // maxFunctionDefs | |
...two(0), // maxInstructionDefs | |
...two(0), // maxStackElements | |
...two(0), // maxSizeOfInstructions | |
...two(0), // maxComponentElements | |
...two(0), // maxComponentDepth | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/os2 | |
const os2 = [ | |
...two(0), // version | |
...two(0), // xAvgCharWidth | |
...two(500), // usWeightClass (anything from 100 to 900 will work) | |
...two(5), // usWidthClass (normal) | |
...two(0), // fsType | |
...two(0), // ySubscriptXSize | |
...two(0), // ySubscriptYSize | |
...two(0), // ySubscriptXOffset | |
...two(0), // ySubscriptYOffset | |
...two(0), // ySuperscriptXSize | |
...two(0), // ySuperscriptYSize | |
...two(0), // ySuperscriptXOffset | |
...two(0), // ySuperscriptYOffset | |
...two(0), // yStrikeoutSize | |
...two(0), // yStrikeoutPosition | |
...two(0), // sFamilyClass | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // panose (matches any spec) | |
...four(0), ...four(0), ...four(0), ...four(0), // ulUnicodeRange1..4 | |
...four(0x00000000), // achVendorId | |
...two(0x0040), // fsSelection (regular) | |
...two(0), // usFirstCharIndex (OTS does fix this so leave it as 0) | |
...two(0), // usLastCharIndex (ditto) | |
...two(0), // sTypoAscender | |
...two(0), // sTypoDescender | |
...two(0), // sTypoLineGap | |
...two(0), // usWinAscent | |
...two(0), // usWinDescent | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/glyf | |
const glyfBboxBitmapSize = ((numGlyphs + 31) >>> 5) * 4; | |
const glyf = [ | |
...two(0), // reserved | |
...two(0), // optionFlags | |
...two(numGlyphs), // numGlyphs | |
...two(0), // indexFormat (= head.indexToLocFormat) | |
...four(2 * numGlyphs), // nContourStreamSize | |
...four(1), // nPointsStreamSize | |
...four(1), // flagStreamSize | |
...four(2), // glyphStreamSize | |
...four(0), // compositeStreamSize | |
...four(glyfBboxBitmapSize), // bboxStreamSize | |
...four(0), // instructionStreamSize | |
// nContourStream = [1, 0...] | |
...repeat(numGlyphs, i => two(i === 0 ? 1 : 0)), | |
// nPointsStream = [1] | |
1, | |
// flagStream = [0] | |
// flag 0: x = 0, y = 0 - (8 additional bits) | |
0, | |
// glyphStream | |
0b00000000, // additional bits for flag 0 | |
0, // instruction length for the glyph | |
// bboxBitmap = [0...] | |
...repeat(glyfBboxBitmapSize, _ => [0]), | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/name | |
const name = [ | |
...two(0), // version | |
...two(0), // count | |
...two(6), // storageOffset (can't be zero, OTS will complain) | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/post | |
const post = [ | |
...four(0x00030000), // version | |
...four(0), // italicAngle | |
...two(0), // underlinePosition | |
...two(0), // underlineThickness | |
...four(0), // isFixedPitch | |
...four(0), ...four(0), // min/maxMemType42 (suggestive, can be left as 0) | |
...four(0), ...four(0), // min/maxMemType1 (ditto) | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap | |
const cmap = [ | |
...two(0), // version | |
...two(1), // numTables | |
...two(3), // platformID (Microsoft) | |
...two(10), // encodingID (Unicode full repertoire) | |
...four(12), // subtableOffset | |
// cmap format 12 subtable: | |
...two(12), // format | |
...two(0), // reserved | |
...four(28), // length | |
...four(0), // language (default) | |
...four(1), // numGroups | |
...four(startCode - excessGlyphs), // startCharCode | |
...four(startCode - excessGlyphs + numGlyphs - 1), // endCharCode | |
...four(0), // startGlyphId | |
]; | |
// https://docs.microsoft.com/en-us/typography/opentype/spec/hmtx | |
const hmtx = [ | |
0x03, // flags (both lsb and leftSideBearing inferred) | |
...repeat(excessGlyphs, i => two(0)), | |
...input, // advanceWidth[numberOfHMetrics] | |
]; | |
let tables = [ | |
{ tag: 0, transform: 0, data: cmap }, | |
{ tag: 1, transform: 0, data: head }, | |
{ tag: 2, transform: 0, data: hhea }, | |
{ tag: 4, transform: 0, data: maxp }, | |
{ tag: 5, transform: 0, data: name }, | |
{ tag: 6, transform: 0, data: os2 }, | |
{ tag: 7, transform: 0, data: post }, | |
// unlike other tables, the glyf table can be reconstructed in multiple ways | |
// and WOFF2 specifically ignores the original size of the glyf table. | |
{ tag: 10, transform: 0, data: glyf, originalSize: 0 }, | |
{ tag: 3, transform: 1, data: hmtx, originalSize: 4 * numGlyphs }, | |
]; | |
// basic tabu search, because full search is too slow and I didn't bother | |
// to improve that beyond any simple algorithm, good enough nevertheless | |
const shuffleSeen = new Set(); | |
let bestShuffleSize = Infinity; | |
let bestShuffle = tables.map((_, i) => i); | |
let shuffle = tables.map((_, i) => i); | |
let moreJumps = Number(options.jumps ?? Infinity); | |
const numPasses = Number(options.passes ?? 60); | |
for (let pass = 0; pass < numPasses; ++pass) { | |
let bestSwap = null; | |
let saved; | |
let moreNeighbor = false; | |
for (let i = 0; i < shuffle.length; shuffle[i++] = saved) { | |
saved = shuffle[i]; | |
next: for (let j = 0; j < i; shuffle[j++] = shuffle[i]) { | |
shuffle[i] = shuffle[j]; | |
shuffle[j] = saved; | |
const shuffleKey = shuffle.toString(); | |
if (shuffleSeen.has(shuffleKey)) continue; | |
shuffleSeen.add(shuffleKey); | |
moreNeighbor = true; | |
let seen_glyf = false, seen_hmtx = false, seen_hhea = false; | |
let uncompressed = []; | |
for (const k of shuffle) { | |
// for some reason, hhea and glyf should precede hmtx | |
// in addition to glyf preceding loca... | |
if (tables[k].tag === 2) { // hhea | |
if (seen_hmtx) continue next; | |
seen_hhea = true; | |
} | |
if (tables[k].tag === 3) { // hmtx | |
if (!seen_glyf || !seen_hhea) continue next; | |
seen_hmtx = true; | |
} | |
if (tables[k].tag === 10) { // glyf | |
if (seen_hmtx) continue next; | |
seen_glyf = true; | |
} | |
uncompressed.push(...tables[k].data); | |
} | |
uncompressed = new Uint8Array(uncompressed); | |
for (const params of generateBrotliParams()) { | |
const compressed = zlib.brotliCompressSync(uncompressed, params); | |
if (bestShuffleSize > compressed.length) { | |
bestShuffleSize = compressed.length; | |
bestSwap = [i, j]; | |
} | |
} | |
} | |
} | |
if (bestSwap) { | |
stderr.write('!'); | |
const [i, j] = bestSwap; | |
const temp = shuffle[i]; | |
shuffle[i] = shuffle[j]; | |
shuffle[j] = temp; | |
bestShuffle = [...shuffle]; | |
} else { | |
if (moreNeighbor) { | |
stderr.write('.'); | |
} else if (--moreJumps > 0) { | |
stderr.write('?'); | |
} else { | |
break; | |
} | |
for (let k = 0; k < 2; ++k) { | |
const i = Math.random() * (shuffle.length - 1) + 1 | 0; | |
const j = Math.random() * i | 0; | |
const temp = shuffle[i]; | |
shuffle[i] = shuffle[j]; | |
shuffle[j] = temp; | |
} | |
} | |
} | |
const shuffled = []; | |
for (const k of bestShuffle) { | |
shuffled.push(tables[k]); | |
if (tables[k].tag === 10) { // glyf follows loca | |
shuffled.push({ tag: 11, transform: 0, originalSize: 2 + 2 * numGlyphs }); | |
} | |
} | |
tables = shuffled; | |
const out = [ | |
...ascii('wOF2'), // signature | |
...four(0x00010000), // flavor (sfnt version) | |
...four(0), // total size (patched later) | |
...two(tables.length), // # tables | |
...two(0), // reserved | |
...four(0), // total uncompressed size (unused but should be "reasonable", patched later) | |
...four(0), // total compressed size (patched later) | |
...two(0), // font major version (unused) | |
...two(0), // font minor version (unused) | |
...four(0), // metadata block offset | |
// encoded lengths may accidentally match `</?[a-zA-Z]`, so we have to close it | |
...ascii('><bo'), // metadata compressed size (current impl doesn't check them if offset=0) | |
...ascii('dy '), // metadata uncompressed size (ditto) | |
...four(0), // private data block offset (may be patched later) | |
...four(0), // private data block size (may be patched later) | |
]; | |
for (const { tag, transform, data, originalSize } of tables) { | |
out.push(tag | transform << 6); | |
if (typeof originalSize === 'number') out.push(...base128(originalSize)); | |
out.push(...base128(data ? data.length : 0)); | |
} | |
const headerSize = out.length; | |
// this goes into the first (empty) metablock in the Brotli stream, | |
// which gets placed much earlier so can be safely placed without much interference. | |
const write = (options.func === 'document.write'); | |
const bootstrapInMetadata = ' onload="' + | |
"C=$.getContext`2d`;" + | |
"new FontFace('x','url(#')" + | |
".load(K=String.fromCharCode)" + | |
".then(F=>{" + | |
(write ? "_=document;_" : "document") + ".fonts.add(F);" + | |
"C.font='1pc x';" + | |
(refAdvance ? | |
"for(" + | |
"A=I='';" + | |
`W=C.measureText(K(${startCodeStr}+I)).width;` + | |
"I++?" + | |
"A+=K(V>>8,V&255):" + | |
`F=W/${refAdvance}` + | |
")" + | |
"V=W/F+.1;" | |
: | |
"for(" + | |
"A=I='';" + | |
`W=C.measureText(K(${startCodeStr}+I++)).width;` + | |
"A+=K(W>>8,W&255)" + | |
")" + | |
";" | |
) + | |
(write ? "_.write" : "eval") + "(A)" + | |
"})" + | |
'"><canvas id=$>'; | |
// this goes into the private data following the entire font data. | |
const bootstrapInPrivate = ``; | |
let uncompressed = []; | |
for (const { data } of tables) { | |
if (data) uncompressed.push(...data); | |
} | |
uncompressed = new Uint8Array(uncompressed); | |
let bestCompressed = null; | |
let bestCompressedSize = Infinity; | |
for (const params of generateBrotliParams()) { | |
let compressed; | |
if (bootstrapInMetadata.length > 0) { | |
compressed = await compressBrotliWithEmptyMetadata(uncompressed, params); | |
} else { | |
compressed = zlib.brotliCompressSync(uncompressed, params); | |
} | |
if (bestCompressedSize > compressed.length) { | |
bestCompressedSize = compressed.length; | |
bestCompressed = compressed; | |
} | |
} | |
if (bootstrapInMetadata.length > 0) { | |
bestCompressed = fillBrotliMetadata(bestCompressed, ascii(bootstrapInMetadata)); | |
} | |
out.push(...bestCompressed); | |
while (out.length % 4 !== 0) out.push(0); | |
const fontSize = out.length; | |
out.push(...ascii(bootstrapInPrivate)); | |
const totalSize = out.length; | |
out.splice(8, 4, ...four(totalSize)); // total size | |
out.splice(16, 4, ...four(totalSize + 1)); // total uncompressed size (faked) | |
out.splice(20, 4, ...four(fontSize - headerSize)); // total compressed size | |
if (bootstrapInPrivate.length > 0) { | |
out.splice(40, 4, ...four(fontSize)); // private data block offset | |
out.splice(44, 4, ...four(bootstrapInPrivate.length)); // private data block size | |
} | |
console.log(out.length); | |
return new Uint8Array(out); | |
}; | |
const args = argv.slice(2); | |
const options = {}; | |
while (args.length > 0 && args[0].startsWith('--')) { | |
const m = args.shift().match(/^--([^=]*)(=(.*))?$/); | |
options[m[1]] = m[2] ? m[3] : true; | |
} | |
if (args.length < 2) { | |
console.log(`\ | |
Usage: ${argv[0]} ${argv[1]} {options} infile outfile | |
--passes=# Relative amount of search done. | |
--jumps=# Relative amount of pertubartion when stuck. | |
--func=document.write Assumes \`infile\` is an HTML file. | |
--func=eval Assumes \`infile\` is a JavaScript file. | |
--start-code=### First code point to be used, as a JS token. | |
--reference-advance=# If set to zero, sacrifice some Firefox compat. | |
`); | |
} else { | |
const path = args[0]; | |
const input = [...fs.readFileSync(path)]; | |
options.func ??= path.match(/\.html?$/i) ? 'document.write' : 'eval'; | |
const out = await makeFont(input, options); | |
fs.writeFileSync(args[1], out); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment