Last active
January 1, 2025 14:51
-
-
Save bluwy/4e8c411c3a3f2024394d1e8e423b6776 to your computer and use it in GitHub Desktop.
Performance between using node Buffer vs ArrayBuffer to parse tar files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
βββββββββββ¬βββββββββββββββββββββββ¬βββββββββββββββββββββββ¬ββββββββββββββββββββββ¬βββββββββββββββββββββββββββββ¬ββββββββββββββββββββββββββββ¬ββββββββββ | |
β (index) β Task name β Latency average (ns) β Latency median (ns) β Throughput average (ops/s) β Throughput median (ops/s) β Samples β | |
βββββββββββΌβββββββββββββββββββββββΌβββββββββββββββββββββββΌββββββββββββββββββββββΌβββββββββββββββββββββββββββββΌββββββββββββββββββββββββββββΌββββββββββ€ | |
β 0 β 'buffer' β '53789.86 Β± 0.14%' β '52708.03' β '18746 Β± 0.06%' β '18972' β 37182 β | |
β 1 β 'array buffer' β '39104.97 Β± 0.74%' β '36292.02' β '26764 Β± 0.08%' β '27554' β 51145 β | |
β 2 β 'array buffer extra' β '37756.63 Β± 0.25%' β '36250.00' β '27002 Β± 0.07%' β '27586' β 52971 β | |
βββββββββββ΄βββββββββββββββββββββββ΄βββββββββββββββββββββββ΄ββββββββββββββββββββββ΄βββββββββββββββββββββββββββββ΄ββββββββββββββββββββββββββββ΄ββββββββββ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { readFileSync } from 'fs' | |
import { Bench } from 'tinybench' | |
import zlib from 'zlib' | |
import util from 'util' | |
const gunzip = util.promisify(zlib.gunzip) | |
const tgzNodeBuffer = readFileSync('./@babel__core-7.26.0.tgz') | |
const buffer = await gunzip(tgzNodeBuffer) | |
const arrayBuffer = buffer.buffer | |
const bench = new Bench({ | |
time: 2000, | |
warmupTime: 500, | |
}) | |
bench | |
.add('buffer', async () => { | |
// const buffer = await gunzip(tgzNodeBuffer) | |
unpackWithBuffer(buffer) | |
}) | |
.add('array buffer', async () => { | |
// const arrayBuffer = (await gunzip(tgzNodeBuffer)).buffer | |
unpackWithArrayBuffer(arrayBuffer) | |
}) | |
.add('array buffer extra', async () => { | |
// const arrayBuffer = (await gunzip(tgzNodeBuffer)).buffer | |
unpackWithArrayBuffer(arrayBuffer) | |
}) | |
await bench.run() | |
console.table(bench.table()) | |
function unpackWithBuffer(content) { | |
/** @type {string[]} */ | |
const fileNames = [] | |
let offset = 0 | |
while (offset < content.length) { | |
// Get file type from header (from offset 156, 1 byte) | |
const type = content.subarray(offset + 156, offset + 157).toString() | |
// Skip empty blocks at end | |
if (type === '\0') break | |
// Only handle files (0). Packed packages often only contain files and no directories. | |
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those. | |
if (type === '0') { | |
// Get file name from header (from offset 0, 100 bytes) | |
const name = content | |
.subarray(offset, offset + 100) | |
.toString() | |
.split('\0', 1)[0] | |
fileNames.push(name) | |
} | |
// Get file size from header (from offset 124, 12 bytes) | |
const size = parseInt( | |
content.subarray(offset + 124, offset + 136).toString(), | |
8 | |
) | |
// Skip header and file content (padded to 512 bytes) | |
offset += 512 + Math.ceil(size / 512) * 512 | |
} | |
return fileNames | |
} | |
export function unpackWithArrayBuffer(tarball) { | |
const decoder = new TextDecoder() | |
const fileNames = [] | |
let offset = 0 | |
while (offset < tarball.byteLength) { | |
// Get file type from header (from offset 156, 1 byte) | |
const type = read(tarball, decoder, offset + 156, 1) | |
// Skip empty blocks at end | |
if (type === '\0') break | |
// Get file size from header (from offset 124, 12 bytes) | |
const size = parseInt(read(tarball, decoder, offset + 124, 12), 8) | |
// Only handle files (0). Packed packages often only contain files and no directories. | |
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those. | |
if (type === '0') { | |
// Get file name from header (from offset 0, 100 bytes) | |
const name = read(tarball, decoder, offset, 100).split('\0', 1)[0] | |
fileNames.push(name) | |
} | |
// Skip header and file content (padded to 512 bytes) | |
offset += 512 + Math.ceil(size / 512) * 512 | |
} | |
return fileNames | |
} | |
export function unpackWithArrayBufferExtra(tarball) { | |
const decoder = new TextDecoder() | |
const files = [] | |
let offset = 0 | |
while (offset < tarball.byteLength) { | |
// Get file type from header (from offset 156, 1 byte) | |
const type = read(tarball, decoder, offset + 156, 1) | |
// Skip empty blocks at end | |
if (type === '\0') break | |
// Get file size from header (from offset 124, 12 bytes) | |
const size = parseInt(read(tarball, decoder, offset + 124, 12), 8) | |
// Only handle files (0). Packed packages often only contain files and no directories. | |
// It may contain PAX headers (x) and global PAX headers (g), but we don't need to handle those. | |
if (type === '0') { | |
// Get file name from header (from offset 0, 100 bytes) | |
const name = read(tarball, decoder, offset, 100).split('\0', 1)[0] | |
// Get file content from header (from offset 512, `size` bytes) | |
const data = new Uint8Array(tarball, offset + 512, size) | |
files.push({ name, data }) | |
} | |
// Skip header and file content (padded to 512 bytes) | |
offset += 512 + Math.ceil(size / 512) * 512 | |
} | |
return files | |
} | |
/** | |
* @param {ArrayBuffer} buffer | |
* @param {TextDecoder} decoder | |
* @param {number} offset | |
* @param {number} length | |
*/ | |
function read(buffer, decoder, offset, length) { | |
const view = new Uint8Array(buffer, offset, length) | |
return decoder.decode(view) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment