-
-
Save Mr0grog/1f5e14ac360e64304c0031282e0cae3f to your computer and use it in GitHub Desktop.
/** | |
* Test out the performance and memory usage of various ways to compose a gzip | |
* of streaming JSON log lines. | |
* | |
* One fun property of gzip is that a stream with multiple gzip chunks is | |
* itself valid as a single gzip stream/file. That means there are lots of | |
* interesting ways to break down how you build and stream out gzip data. | |
*/ | |
const fs = require("fs"); | |
const stream = require("stream"); | |
const { pipeline } = require("stream/promises"); | |
const zlib = require('zlib'); | |
const { Buffer } = require('buffer'); | |
const { promisify } = require('util'); | |
const JSONStream = require("JSONStream"); | |
const split2 = require("split2"); | |
const streamChunker = require('stream-chunker'); | |
// A reasonably large input file that matches the type of input we expect. | |
// ~3 GB of ND-JSON data. Most lines are pretty short (~150 bytes), but some | |
// are very long (~250 kB). All lines share the same structure, and lines of | |
// similar length tend to come together in chunks. | |
const inFilePath = "./data/univaf_raw/availability_log-2021-09-21.ndjson"; | |
/** | |
* Simplest implementation: just throw Node's built-in gzip stream in a stream | |
* pipeline after we serialize the JSON. | |
*/ | |
async function compressionSingleStream() { | |
let i = 0; | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
JSONStream.stringify(false), | |
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }), | |
fs.createWriteStream(`${inFilePath}.basicStream.gz`) | |
); | |
} | |
/** | |
* Possibly over-engineered: create a stream that outputs multiple gzips, one | |
* after the other (which together form a valid gzip). There's a lot of messy | |
* work here involved in making sure the whole thing continues to stream without | |
* taking over a huge chunk of memory for the batching. | |
*/ | |
async function compressionBatchSubStream() { | |
class GzipBatchStream extends stream.Duplex { | |
constructor(options) { | |
super({ | |
readableObjectMode: false, | |
writableObjectMode: true | |
}); | |
this.createNewZipper(); | |
this.batchSize = options.batchSize || 10_000; | |
} | |
createNewZipper() { | |
this.inputCount = 0; | |
this.currentZipStream = zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }); | |
this.currentZipStream.on("data", (chunk) => { | |
if (!this.push(chunk)) { | |
this.currentZipStream.pause(); | |
} | |
}) | |
} | |
_write(chunk, _encoding, callback) { | |
return this.currentZipStream.write(JSON.stringify(chunk) + "\n", (error) => { | |
if (error) { | |
return callback(error); | |
} | |
this.inputCount++; | |
if (this.inputCount === 10_000) { | |
// Don't call back that we're done until the current batch stream has been consumed! | |
this.currentZipStream.once("end", () => { | |
this.createNewZipper(); | |
callback(); | |
}); | |
this.currentZipStream.end(); | |
} else { | |
callback(); | |
} | |
}); | |
} | |
_final(callback) { | |
this.currentZipStream.once("end", () => { | |
this.currentZipStream = null; | |
callback(); | |
}); | |
this.currentZipStream.end(); | |
} | |
_read(_size) { | |
if (this.currentZipStream) { | |
this.currentZipStream.resume(); | |
} | |
} | |
} | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
new GzipBatchStream({ batchSize: 10_000 }), | |
fs.createWriteStream(`${inFilePath}.batchSubStream.gz`) | |
); | |
} | |
/** | |
* Simple batching: buffer up a large chunk of serialized output and then gzip | |
* the whole chunk and output that on the stream. Like | |
* `compressionBatchSubStream`, this winds up creating an output stream that is | |
* a bunch of gzips one after the other (which together also form a valid gzip.) | |
* This ought to take a lot more memory, and maybe also be slower. We'll see. | |
*/ | |
async function compressionBatchChunks() { | |
let i = 0; | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
async function* (source) { | |
const gzipPromise = promisify(zlib.gzip); | |
let batch = ""; | |
let batchSize = 0; | |
for await (const row of source) { | |
batchSize++; | |
batch += JSON.stringify(row) + "\n"; | |
if (batchSize === 10_000) { | |
yield await gzipPromise(batch, { level: zlib.constants.Z_BEST_COMPRESSION }); | |
batch = ""; | |
batchSize = 0; | |
} | |
} | |
if (batch.length) { | |
yield await gzipPromise(batch, { level: zlib.constants.Z_BEST_COMPRESSION }); | |
batch = ""; | |
batchSize = 0; | |
} | |
}, | |
fs.createWriteStream(`${inFilePath}.batchChunkStream.gz`) | |
); | |
} | |
/** | |
* Like `compressionBatchChunks`, but separates the batching from the gzipping. | |
* One steam outputs batches of 10,000 lines, and its output is consumed by a | |
* normal gzip stream, rather than the single shot `gzip()` call for each batch. | |
*/ | |
async function compressionBatchChunks2(batchSize) { | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
async function* (source) { | |
for await (const row of source) { | |
yield JSON.stringify(row) + "\n"; | |
} | |
}, | |
async function* (source) { | |
let buffer = ""; | |
let bufferSize = 0; | |
for await (const row of source) { | |
bufferSize++; | |
buffer += row; | |
if (bufferSize === batchSize) { | |
yield buffer; | |
buffer = ""; | |
bufferSize = 0; | |
} | |
} | |
yield buffer; | |
}, | |
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }), | |
fs.createWriteStream(`${inFilePath}.batchChunkStream2.gz`) | |
); | |
} | |
/** | |
* Like `compressionBatchChunks2`, but batches by bytes instead of by line. | |
*/ | |
async function compressionBatchChunks2Bytes(batchSize) { | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
async function* (source) { | |
for await (const row of source) { | |
yield Buffer.from(JSON.stringify(row) + "\n", "utf8"); | |
} | |
}, | |
async function* (source) { | |
let buffer = Buffer.allocUnsafe(batchSize); | |
let bufferPosition = 0; | |
for await (const input of source) { | |
let inputPosition = 0; | |
while (inputPosition < input.length) { | |
const written = input.copy(buffer, bufferPosition, inputPosition); | |
inputPosition += written; | |
bufferPosition += written; | |
if (bufferPosition === batchSize) { | |
yield buffer; | |
buffer = Buffer.alloc(batchSize); | |
bufferPosition = 0; | |
} | |
} | |
} | |
// Emit any leftovers. | |
if (bufferPosition > 0) { | |
yield buffer.slice(0, bufferPosition); | |
} | |
}, | |
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }), | |
fs.createWriteStream(`${inFilePath}.batchChunkStream2Bytes.gz`) | |
); | |
} | |
/** | |
* Like `compressionBatchChunks2Bytes`, but with proper streams instead of | |
* async generators. | |
*/ | |
async function compressionBatchChunks2BytesProper({ batchSize, setHighWaterMark = false, maxMemLevel = false, setChunkSize = false, setGzipHighWaterMark = false }) { | |
// Couldn't find a good version of this on NPM (seems surprising, I'm probably | |
// missing it). But the `stream-chunker` package performs *terribly* (it's | |
// worse than no chunking at all!) | |
class BufferedStream extends stream.Transform { | |
constructor ({ size = 256 * 1024, setHighWaterMark = false } = {}) { | |
const options = {}; | |
if (setHighWaterMark) options.readableHighWaterMark = size; | |
super(options); | |
this.size = size; | |
this.resetBuffer(); | |
} | |
resetBuffer () { | |
this.buffer = Buffer.allocUnsafe(this.size); | |
this.offset = 0; | |
} | |
_transform (input, encoding, callback) { | |
if (typeof input === "string") { | |
input = Buffer.from(input, encoding); | |
} else if (!(input instanceof Buffer)) { | |
callback(new TypeError(`BufferedStream input must be strings or buffers, not ${input.constructor.name}`)); | |
return; | |
} | |
let inputPosition = 0; | |
while (inputPosition < input.length) { | |
const written = input.copy(this.buffer, this.offset, inputPosition); | |
inputPosition += written; | |
this.offset += written; | |
if (this.offset === this.size) { | |
this.push(this.buffer); | |
this.resetBuffer(); | |
} | |
} | |
callback(); | |
} | |
_flush (callback) { | |
if (this.offset > 0) { | |
this.push(this.buffer.slice(0, this.offset)); | |
} | |
callback(); | |
} | |
_destroy (error, callback) { | |
this.buffer = null; | |
callback(error); | |
} | |
} | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
JSONStream.stringify(false), | |
new BufferedStream({ size: batchSize, setHighWaterMark }), | |
zlib.createGzip({ | |
level: zlib.constants.Z_BEST_COMPRESSION, | |
memLevel: maxMemLevel ? zlib.constants.Z_MAX_LEVEL : zlib.constants.Z_DEFAULT_MEMLEVEL, | |
chunkSize: setChunkSize ? batchSize : undefined, | |
highWaterMark: setGzipHighWaterMark ? batchSize : undefined | |
}), | |
fs.createWriteStream(`${inFilePath}.batchChunkStream2BytesProper.gz`) | |
); | |
} | |
/** | |
* Like `compressionBatchChunks2Bytes`, but with a third-party component | |
* (stream-chunker). | |
*/ | |
async function compressionBatchChunks2Bytes3p(batchSize) { | |
await pipeline( | |
fs.createReadStream(inFilePath), | |
split2(), | |
async function* (source) { | |
for await (const line of source) { | |
if (line) { | |
yield JSON.parse(line); | |
} | |
} | |
}, | |
JSONStream.stringify(false), | |
streamChunker(batchSize, { flush: true, align: false }), | |
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }), | |
fs.createWriteStream(`${inFilePath}.batchChunkStream2Bytes3p.gz`) | |
); | |
} | |
async function main() { | |
const sizeArgument = process.argv.find(x => x.startsWith("--size=")); | |
const batchSize = sizeArgument && parseInt(sizeArgument.match(/=(.*)$/)?.[1], 10) || 10_000; | |
console.log("Batch size:", batchSize); | |
const maxMemLevel = process.argv.includes('--max-mem-level'); | |
console.log("maxMemLevel:", maxMemLevel); | |
const setHighWaterMark = process.argv.includes('--set-high-water-mark'); | |
console.log("setHighWaterMark:", setHighWaterMark); | |
const setChunkSize = process.argv.includes('--set-chunk-size'); | |
console.log("setChunkSize:", setChunkSize); | |
const setGzipHighWaterMark = process.argv.includes('--set-gzip-high-water-mark'); | |
console.log("setGzipHighWaterMark:", setGzipHighWaterMark); | |
// Print memory usage every few seconds. This is optional so we can try a few | |
// runs without it and make sure it's not impacting timing. | |
if (process.argv.includes("--show-memory")) { | |
const formatter = new Intl.NumberFormat(); | |
console.log("RSS\tHeap Total\tHeap Used\tExternal\tArrayBuffers") | |
setInterval(() => { | |
const usage = process.memoryUsage(); | |
console.log([ | |
formatter.format(usage.rss).padStart(11, " "), | |
formatter.format(usage.heapTotal).padStart(11, " "), | |
formatter.format(usage.heapUsed).padStart(11, " "), | |
formatter.format(usage.external).padStart(11, " "), | |
formatter.format(usage.arrayBuffers).padStart(11, " "), | |
].join("\t")); | |
}, 5_000).unref(); | |
} | |
if (process.argv.includes('single-stream')) { | |
await compressionSingleStream(); | |
} | |
if (process.argv.includes('batch-sub-stream')) { | |
await compressionBatchSubStream(); | |
} | |
if (process.argv.includes('batch-chunk-stream')) { | |
await compressionBatchChunks(); | |
} | |
if (process.argv.includes('batch-chunk-stream-2')) { | |
await compressionBatchChunks2(batchSize); | |
} | |
if (process.argv.includes('batch-chunk-stream-2-bytes')) { | |
await compressionBatchChunks2Bytes(batchSize); | |
} | |
if (process.argv.includes('batch-chunk-stream-2-bytes-proper')) { | |
await compressionBatchChunks2BytesProper({ | |
batchSize, | |
maxMemLevel, | |
setHighWaterMark, | |
setChunkSize, | |
setGzipHighWaterMark | |
}); | |
} | |
if (process.argv.includes('batch-chunk-stream-2-bytes-3p')) { | |
await compressionBatchChunks2Bytes3p(batchSize); | |
} | |
} | |
main().catch(error => { | |
console.log(error); | |
process.exitCode = 1; | |
}); |
Given my surprise at the performance of compressionBatchChunks
, I added a compressionBatchChunks2
routine that separates the batching and gzipping, so we have a stream that outputs large strings as batches and and then a normal gzip stream that consumes it (instead of gzipping the whole chunk as one call and outputting that).
-
compressionBatchChunks2
Memory Type Average Min Max RSS 112,496,091 98,324,480 148,488,192 Heap Total 54,839,696 42,377,216 87,490,560 Heap Used 26,598,015 8,233,048 63,777,840 External 87,834,419 2,938,557 392,328,303 Array Buffers 3,616,445 223,429 14,988,181 Rough timing over a few runs:
Real Time: 2m20s
User Time: 2m20s
System Time: 0m5s
It’s not directly comparable since I shut down and restarted the EC2 instance in between, but at least a rough comparison is reasonable. It performs pretty similarly, both in terms of memory and time (higher peak memory usage, but averages are quite close). It’s conceptually simpler to engineer, so seems like a (small) universal improvement over compressionBatchChunks
. But not monumentally different in any way.
I came back again for one more round of testing on this.
Since compressionBatchChunks2
taught me that we really don’t need to do any chunking of the resulting gzips to get low-memory streaming and compression, it stands to reason that memory usage could be more optimized if we batch by bytes instead of by number of lines. The added tests here look at that, and (unsurprisingly) it improves memory usage in general and also makes the range between minimum and maximum memory usage much smaller — the memory footprint stays generally consistent.
In the new code, I’ve added 3 methods:
-
compressionBatchChunks2Bytes
works similarly tocompressionBatchChunks2
, except it creates buffers of N bytes instead of strings of N lines. -
compressionBatchChunks2BytesProper
is the same as the above, but the batching code is written as a transform stream instead of as an async generator. I was curious whether the async generators supported bystream.pipeline()
imposed any extra overhead (or maybe even had less overhead) compared to normal streams, but writing the code as a stream also allowed me to test some optimizations around high water marks (which you can’t set for the generator). Those optimizations are:- Setting
highWaterMark
on the batching stream to match the size of the batches. This had no notable impact. (On the other hand, I can imagine how this might make a difference is data is coming into the batching stream more slowly.) - Setting
memLevel
on the gzip stream to the maximum value (that is,9
instead of8
). This maybe has some minor speed improvements when the batch size is 64 kB – 128 kB, but it seems as likely that this is just an artifact of the particular data file I was testing with. (I don’t think it was not doing enough runs to average out random jitter, since the same pattern repeats in the next test.) - Setting
highWaterMark
on the batching stream andmemLevel
on the gzip stream. This performs basically the same as just usingmemLevel
. - Setting
chunkSize
to match the batch size (the default is 16 kB) in addition to the above optimizations. This had a small but consistent speed improvement for batch sizes above 64 kB. In reality, I think what’s happening here is that thehighWaterMark
for filesystem streams is 64 kB, so this is really more about how few reads are needed to fill the next stream’s buffer, rather than anything about gzipping. In any case, this does mean that optimizing the gzip stream for the high water mark of the next stream can have a pretty big impact on overall performance, which is good to keep in mind. - Setting
highWaterMark
on the gzip stream in addition to the previous optimization. This had no notable impact.
It’s worth noting that none of the above optimizations made a consistent or notable impact on memory usage.
One other minor advantage here is that the async generators don’t work with the old-style stream created by
JSONStream
, while an actual stream object does. This isn’t a huge deal, though. - Setting
-
compressionBatchChunks2Bytes3p
uses thestream-chunker
package to batch up the data instead of custom code. It turns out to be incredibly inefficient, and actually makes the whole pipeline perform worse than anything else.
Overall, these approaches (excepting compressionBatchChunks2Bytes3p
, which was just bad all around) improved both memory usage and time. I didn’t have a whole lot of time to dig into this, but I’m guessing most of the speed improvement between buffering by line and buffering by bytes is down doing more string operations in JS, which would generally be a bit more expensive that operating on bytes in buffers. It could also be that I made a poor estimation of average bytes per line and the comparison isn’t as comparable as I’d hope it to be. In any case, the memory improvements between them seem more obviously clear.
One interesting thing here is that user time tends to be pretty consistent between the different optimizations, so what the optimizations are affecting is likely to be mainly about a) how well zlib is able to spread its work across cores without bottlenecking and b) how much time is spent shuttling memory/data around between JS and zlib.
In most cases, performance seems pretty consistent with chunk sizes above 32 kB, and progressively slows down as the chunks get smaller. This makes sense, since the default windowBits
for zlib is 15, which equates to a 32 kB data window to work with (note: the default is also the max; this can’t be made bigger). So as soon as the batches get smaller than that, we start wasting time waiting for data to fill zlib’s internal working buffer and things really to crawl.
Finally, memory usage between all the different byte buffering approaches here was pretty consistent, so I only included numbers from one of the methods in the tables below.
Overall Lessons Here
-
Gzip streams can work over large streams of data pretty efficiently and really don’t need much memory at all. You don’t need to manually break up your data into multiple gzip blocks to get efficient streaming output. (I’m not sure if this is due to an implementation change from the early days, if I was just remembering something incorrectly, or if I’d never rigorously tested what I’d read about this way back early on. 🤷 )
-
Batching data before it arrives at a gzip stream can massively improve both gzip speed and memory usage. Unless the timing of each byte on your stream is very inconsistent and very slow, you should probably always have a stream batch up data before piping/writing it to a gzip stream.
-
Batches should ideally be >= the window size you are using in zlib (by default, this is 32 kB). You can calculate this as
batchSizeBytes = 1 << windowBits
. -
Setting zlib’s
memLevel
to the max did not make an appreciable difference on the data set I was working with. -
Setting the high water mark on a zlib stream doesn’t make any noticeable difference.
-
When possible, matching a zlib stream’s
chunkSize
to thehighWaterMark
of whatever stream is reading from it can give a small but consistent speed boost.
Some Tables with Performance Measurements
All data comes from samples across 4 runs of of each method/optimization at each batch size. The timings in the table below are averages across all four runs.
-Batched Lines | Batched Bytes | Batched Bytes with HighWaterMark | Batched Bytes with MemLevel | Batched Bytes with HighWaterMark + MemLevel | Batched Bytes with HighWaterMark + MemLevel + ChunkSize | Batched Bytes with HighWaterMark + MemLevel + ChunkSize + Gzip HighWaterMark | |||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Real | User | System | Real | User | System | Real | User | System | Real | User | System | Real | User | System | Real | User | System | Real | User | System | |||||||
384 kB | 2m 20s 526ms | 2m 24s 22ms | 4s 866ms | 1m 44s 560ms | 2m 29s 972ms | 4s 485ms | 1m 43s 448ms | 2m 28s 370ms | 4s 438ms | 1m 42s 825ms | 2m 31s 257ms | 4s 340ms | 1m 42s 180ms | 2m 30s 183ms | 4s 237ms | 1m 23s 670ms | 2m 29s 53ms | 3s 387ms | 1m 23s 985ms | 2m 29s 760ms | 3s 197ms | ||||||
256 kB | 2m 21s 242ms | 2m 24s 882ms | 5s 72ms | 1m 43s 938ms | 2m 29s 910ms | 4s 455ms | 1m 43s 642ms | 2m 29s 520ms | 4s 480ms | 1m 45s 635ms | 2m 30s 167ms | 4s 598ms | 1m 45s 498ms | 2m 30s 278ms | 4s 242ms | 1m 26s 427ms | 2m 31s 428ms | 3s 310ms | 1m 25s 787ms | 2m 30s 268ms | 3s 188ms | ||||||
128 kB | 2m 23s 510ms | 2m 27s 320ms | 5s 840ms | 1m 48s 245ms | 2m 30s 265ms | 4s 912ms | 1m 46s 820ms | 2m 28s 357ms | 4s 603ms | 1m 38s 127ms | 2m 28s 260ms | 4s 482ms | 1m 38s 353ms | 2m 28s 507ms | 4s 465ms | 1m 29s 922ms | 2m 30s 255ms | 3s 575ms | 1m 29s 362ms | 2m 29s 590ms | 3s 485ms | ||||||
64 kB | 2m 20s 620ms | 2m 24s 998ms | 6s 408ms | 1m 44s 953ms | 2m 34s 810ms | 6s 423ms | 1m 40s 715ms | 2m 28s 390ms | 4s 978ms | 1m 31s 557ms | 2m 25s 590ms | 4s 487ms | 1m 32s 445ms | 2m 26s 870ms | 4s 713ms | 1m 31s 490ms | 2m 26s 605ms | 3s 975ms | 1m 31s 662ms | 2m 27s 198ms | 3s 925ms | ||||||
32 kB | 2m 23s 938ms | 2m 27s 446ms | 7s 226ms | 1m 39s 337ms | 2m 33s 993ms | 6s 737ms | 1m 34s 802ms | 2m 28s 97ms | 5s 648ms | 1m 32s 365ms | 2m 29s 35ms | 5s 480ms | 1m 32s 770ms | 2m 29s 490ms | 5s 848ms | 1m 29s 265ms | 2m 23s 547ms | 5s 112ms | 1m 29s 90ms | 2m 23s 748ms | 4s 875ms | ||||||
16 kB | 2m 28s 364ms | 2m 30s 806ms | 8s 576ms | 1m 54s 538ms | 2m 41s 300ms | 8s 488ms | 1m 48s 490ms | 2m 34s 162ms | 7s 725ms | 1m 46s 892ms | 2m 33s 405ms | 7s 158ms | 1m 46s 705ms | 2m 33s 268ms | 7s 178ms | 1m 44s 40ms | 2m 29s 938ms | 7s 32ms | 1m 42s 873ms | 2m 28s 690ms | 6s 702ms | ||||||
8 kB | 2m 17s 626ms | 2m 47s 722ms | 10s 930ms | 2m 16s 752ms | 2m 44s 192ms | 10s 720ms | 2m 16s 37ms | 2m 43s 570ms | 10s 705ms | 2m 11s 238ms | 2m 38s 775ms | 9s 830ms | 2m 15s 420ms | 2m 43s 232ms | 10s 477ms | 2m 10s 97ms | 2m 37s 523ms | 10s 43ms | 2m 5s 370ms | 2m 35s 235ms | 9s 987ms | ||||||
4 kB | 2m 45s 2ms | 3m 1s 602ms | 17s 54ms | 2m 36s 107ms | 2m 50s 768ms | 15s 790ms | 2m 38s 463ms | 2m 53s 37ms | 16s 12ms | 2m 38s 15ms | 2m 52s 218ms | 15s 825ms | 2m 39s 782ms | 2m 53s 953ms | 16s 157ms | 2m 30s 865ms | 2m 44s 75ms | 16s 655ms | 2m 26s 620ms | 2m 41s 398ms | 16s 450ms | ||||||
2 kB | - | - | - | 3m 5s 350ms | 3m 10s 32ms | 27s 670ms | 3m 3s 373ms | 3m 7s 813ms | 27s 480ms | 3m 5s 2ms | 3m 9s 303ms | 27s 490ms | 3m 7s 102ms | 3m 10s 857ms | 27s 960ms | 2m 59s 490ms | 3m 1s 750ms | 31s 335ms | 2m 55s 855ms | 2m 59s 183ms | 30s 680ms | ||||||
1 kB | 4m 7s 480ms | 3m 57s 560ms | 55s 32ms | 3m 57s 37ms | 3m 46s 927ms | 54s 477ms | 3m 57s 162ms | 3m 47s 25ms | 54s 760ms | 3m 53s 578ms | 3m 44s 618ms | 52s 545ms | 3m 54s 555ms | 3m 44s 322ms | 52s 935ms | 3m 47s 550ms | 3m 35s 745ms | 1m 0s 982ms | 3m 46s 415ms | 3m 34s 993ms | 1m 0s 417ms |
This is the memory usage from all 4 runs of compressionBatchChunks2BytesProper
with all the optimizations on. It’s not appreciably different from any other combination of optimizations or from compressionBatchChunks2Bytes
.
Buffer Size | Memory Type | Average | Min | Max |
---|---|---|---|---|
384 kB | RSS | 104,030,272 | 97,927,168 | 118,628,352 |
Heap Total | 40,648,576 | 39,706,624 | 43,089,920 | |
Heap Used | 12,176,966 | 4,901,440 | 20,850,632 | |
External | 57,372,070 | 9,660,953 | 213,398,241 | |
ArrayBuffers | 8,165,564 | 2,582,725 | 21,719,237 | |
256 kB | RSS | 100,179,486 | 96,157,696 | 106,569,728 |
Heap Total | 40,695,748 | 39,444,480 | 42,987,520 | |
Heap Used | 11,943,601 | 5,173,792 | 20,279,872 | |
External | 43,228,420 | 3,893,785 | 244,273,467 | |
ArrayBuffers | 8,046,504 | 2,607,301 | 20,039,877 | |
128 kB | RSS | 94,362,564 | 91,574,272 | 98,009,088 |
Heap Total | 41,277,801 | 39,444,480 | 44,130,304 | |
Heap Used | 12,596,829 | 5,155,880 | 20,859,848 | |
External | 38,256,011 | 4,393,497 | 128,831,350 | |
ArrayBuffers | 3,938,731 | 1,468,613 | 8,360,145 | |
64 kB | RSS | 90,714,804 | 83,144,704 | 94,695,424 |
Heap Total | 41,769,508 | 39,706,624 | 45,133,824 | |
Heap Used | 13,905,169 | 5,619,832 | 23,282,328 | |
External | 25,749,151 | 2,558,489 | 99,488,492 | |
ArrayBuffers | 3,069,412 | 846,021 | 8,441,476 | |
32 kB | RSS | 91,643,362 | 87,035,904 | 96,342,016 |
Heap Total | 42,865,784 | 39,968,768 | 50,642,944 | |
Heap Used | 13,651,218 | 7,118,320 | 21,181,032 | |
External | 15,561,678 | 1,788,441 | 41,385,533 | |
ArrayBuffers | 2,523,312 | 714,949 | 6,515,831 | |
16 kB | RSS | 92,032,307 | 88,014,848 | 98,107,392 |
Heap Total | 43,989,299 | 40,230,912 | 52,494,336 | |
Heap Used | 15,607,441 | 6,186,752 | 27,250,312 | |
External | 7,777,347 | 1,086,626 | 64,537,265 | |
ArrayBuffers | 2,530,505 | 714,949 | 6,241,937 | |
8 kB | RSS | 92,965,282 | 87,474,176 | 100,810,752 |
Heap Total | 44,862,025 | 40,493,056 | 51,908,608 | |
Heap Used | 16,094,696 | 6,113,592 | 26,453,416 | |
External | 5,992,805 | 936,473 | 35,004,788 | |
ArrayBuffers | 2,063,636 | 411,845 | 6,147,329 | |
4 kB | RSS | 92,521,303 | 86,949,888 | 100,409,344 |
Heap Total | 44,862,455 | 40,230,912 | 55,087,104 | |
Heap Used | 16,232,215 | 7,344,272 | 27,337,672 | |
External | 5,466,803 | 1,227,289 | 37,395,032 | |
ArrayBuffers | 2,083,278 | 403,653 | 5,817,932 | |
2 kB | RSS | 91,437,513 | 86,269,952 | 98,705,408 |
Heap Total | 43,407,227 | 40,755,200 | 50,028,544 | |
Heap Used | 14,673,008 | 4,878,200 | 27,261,088 | |
External | 8,205,030 | 576,025 | 27,599,519 | |
ArrayBuffers | 1,943,753 | 174,277 | 7,317,701 | |
1 kB | RSS | 90,928,378 | 85,712,896 | 95,481,856 |
Heap Total | 43,149,016 | 40,230,912 | 48,041,984 | |
Heap Used | 13,615,629 | 5,325,600 | 23,040,816 | |
External | 12,124,117 | 576,025 | 64,054,792 | |
ArrayBuffers | 1,906,545 | 248,005 | 5,843,141 |
Kind of fascinating and somewhat unexpected results here. On a
t3.xlarge
AWS EC2 instance:compressionSingleStream
Rough timing over a few runs:
Real Time: 9m5s
User Time: 7m5s
System Time: 4m30s
compressionBatchSubStream
Rough timing over a few runs:
Real Time: 9m17s
User Time: 7m30s
System Time: 4m22s
compressionBatchChunks
Rough timing over a few runs:
Real Time: 2m23s
User Time: 2m23s
System Time: 0m6s
Unsurprisingly,
compressionBatchChunks
takes more memory (29% – 41% more thancompressionSingleStream
). This will probably depend a lot on on the particular data on the stream, especially since this is chunking by line rather than by byte. That also matches up to the fact that memory usage was really unsteady over a 26 mB range, whilecompressionSingleStream
was nice and steady in a 7 mB range.On the other hand, the speed of
compressionBatchChunks
was a big surprise. There’s clearly a lot of overhead involved in streaming small chunks of data into zlib, and buffering up a big chunk for it to chew on gives an absurdly large speed boost.Last time I worked with gzip in Node (long ago in the v0.x days), there were issues with actually streaming, and some manual chunking was needed. It’s clear that’s no longer the case, and all the fancy footwork I did to do it by hand was entirely pointless and performed about the same or slightly worse than just piping things through a built-in gzip stream. Yay!
Tested that the output is valid for all of these with: