Skip to content

Instantly share code, notes, and snippets.

@MidnightLightning
Last active December 30, 2015 23:28
Show Gist options
  • Select an option

  • Save MidnightLightning/7900466 to your computer and use it in GitHub Desktop.

Select an option

Save MidnightLightning/7900466 to your computer and use it in GitHub Desktop.
Experimenting with Typed Arrays in Node.js

Node Buffer objects say:

A Buffer object can also be used with typed arrays. The buffer object is cloned to an ArrayBuffer that is used as the backing store for the typed array. The memory of the buffer and the ArrayBuffer is not shared.

NOTE: Node.js v0.8 simply retained a reference to the buffer in array.buffer instead of cloning it.

While more efficient, it introduces subtle incompatibilities with the typed arrays specification. ArrayBuffer#slice() makes a copy of the slice while Buffer#slice() creates a view."

Running the benchmark.js test to see whether native Buffer objects outperform Typed Arrays shows that Buffer objects, which are custom-designed for the NodeJS/V8 engine do outperform the generic Typed Arrays which are part of the Javascript language, which makes sense for that particular platform. It would be nice if the same optimizations were applied to Typed Arrays, such that they could be used as a cross-platform interface, but for now, if building specifically for Node, the Buffer objects are more efficient.

Although, if using Browserify to convert a Node-based script to the browser, it converts Buffer objects into browser-friendly objects, using Typed Arrays (source), so in that case, using Buffers as the universal interface works out better.

// Examples of how Typed Arrays (ArrayBuffer, ArrayBufferView, and DataView objects) work:
// Raw ArrayBuffer:
var ab = new ArrayBuffer(32);
var i32 = new Uint32Array(ab, 4, 2); // Start four bytes in, and span out two Int32 values
// Offset must be a multiple of Int32Array.BYTES_PER_ELEMENT
i32.set([0x0102, 0x01020304]); // Set two 32-bit numbers, starting at the beginning of the sub-array
// Index 0 of this array is index 4 of the original
var i8 = new Uint8Array(ab, 5,8); // Link to a sub-set of the original ArrayBuffer
i8.set([0xFF], 1); // Index 1 of this array is index 6 of the original ArrayBuffer, and part of index 0 of i32
console.log('i32:', i32); // Index 0 and 1 of i32 are 0xFF0102 and 0x01020304
console.log('i8:', i8);
// Node Buffer
var nb = new Buffer(32);
nb.fill(1);
var t = nb.slice(2,4);
t[0] = 10; // Works on a view of the same Buffer
i32 = new Uint32Array(nb.slice(2,4)); // "The buffer object is cloned to an ArrayBuffer object...", so we can use the slice result as the source ArrayBuffer for an ArrayBufferView object
console.log('i32:', i32);
console.log('buffer:', nb); // "The memory of the buffer and the ArrayBuffer is not shared"
// Note the two bytes in the Buffer get expanded to 8 bytes in the ArrayBuffer, when it becomes the backing for the Uint32Array.
// The size of the slice determines the size of the Uint32Array, not the size of the ArrayBuffer underneath it.
// This is because the Buffer is treated like an Array, aka:
i32 = new Uint32Array([1,2,3,4,5]);
console.log('i32 size:', i32.byteLength); // five 32-bit numbers, so 20 bytes long ArrayBuffer underneath
// DataView
var dv = new DataView(ab);
console.log('dv:', dv);
// Simple class to walk through a set of raw data, pulling out sequential structured elements:
var seeker = function seeker(data) {
this.cursor = 0;
this.load(data);
}
seeker.prototype.reset = function reset() {
this.cursor = 0;
};
seeker.prototype.load = function load(data) {
var i8 = new Uint8Array(data); // Use an ArrayBufferView to write the data
this.data = DataView(i8.buffer); // Grab a reference to the underlying ArrayBuffer that was created, and make a DataWrapper around it
this.reset();
};
seeker.prototype.int = function int() {
var num = this.data.getUint8(this.cursor);
this.cursor += 1;
return num;
}
seeker.prototype.int16 = function int16() {
var num = this.data.getUint16(this.cursor);
this.cursor += 2;
return num;
}
seeker.prototype.int32 = function int16() {
var num = this.data.getUint32(this.cursor);
this.cursor += 4;
return num;
}
seeker.prototype.int64 = function int16() {
var num = this.data.getUint64(this.cursor);
this.cursor += 8;
return num;
}
var s = new seeker([1,2,3,4,5,6,7,8]); // Load with data
console.log(s.int(), s.int(), s.int16()); // Parse as 8-bit, 8-bit, 16-bit: 0x03 and 0x04 become 0x0304 (772)
console.log(s);
var crypto = require('crypto');
var rounds = 5000;
var arraySize = 10000;
console.time('Random array generation (overhead)');
for (var i = 0; i < rounds; i++) {
var buff = getRandomArray();
buff = null;
}
console.timeEnd('Random array generation (overhead)');
console.log('');
console.log('Create and Load in one step:');
console.time('Node Buffer');
for (var i = 0; i < rounds; i++) {
var buff = new Buffer(getRandomArray());
buff = null;
}
console.timeEnd('Node Buffer');
console.time('ArrayBufferView (Uint8Array)');
for (var i = 0; i < rounds; i++) {
var buff = new Uint8Array(getRandomArray());
buff = null;
}
console.timeEnd('ArrayBufferView (Uint8Array)');
console.log('');
console.log('Create, then Load:');
console.time('Node Buffer (Array access)');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var buff = new Buffer(data.length);
for (var x = 0; x < data.length; x++) {
buff[x] = data[x];
}
data = null;
buff = null;
}
console.timeEnd('Node Buffer (Array access)');
console.time('Node Buffer (writeUInt8)');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var buff = new Buffer(data.length);
for (var x = 0; x < data.length; x++) {
buff.writeUInt8(data[x], x);
}
data = null;
buff = null;
}
console.timeEnd('Node Buffer (writeUInt8)');
console.time('ArrayBufferView (Uint8Array.set)');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var i8 = new Uint8Array(data.length);
i8.set(data);
data = null;
i8 = null;
}
console.timeEnd('ArrayBufferView (Uint8Array.set)');
console.time('ArrayBufferView (Uint8Array Array access)');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var i8 = new Uint8Array(data.length);
for (var x = 0; x < data.length; x++) {
i8[x] = data[x];
}
data = null;
i8 = null;
}
console.timeEnd('ArrayBufferView (Uint8Array Array access)');
console.time('DataView');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var i8 = new Uint8Array(data.length);
var dv = new DataView(i8.buffer);
for (var x = 0; x < data.length; x++) {
dv.setUint8(x, data[x]);
}
data = null;
i8 = null;
dv = null;
}
console.timeEnd('DataView');
console.log('');
console.log('32-bit Big-endian numbers:');
console.time('Node Buffer');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var buff = new Buffer(data.length); // Create
for (var x = 0; x < data.length; x++) {
buff[x] = data[x]; // Load
}
// Manipulate
for (var x = 0; x < buff.length-10; x++) {
buff.writeUInt32BE(buff.readUInt32BE(x+5), x);
}
data = null;
buff = null;
}
console.timeEnd('Node Buffer');
console.time('DataView');
for (var i = 0; i < rounds; i++) {
var data = getRandomArray();
var i8 = new Uint8Array(data.length); // Create
for (var x = 0; x < data.length; x++) {
i8[x] = data[x]; // Load
}
// Manipulate
var buff = new DataView(i8.buffer);
for (var x = 0; x < buff.byteLength-10; x++) {
buff.setUint32(x, buff.getUint32(x+5), false);
}
data = null;
buff = null;
}
console.timeEnd('DataView');
function getRandomArray() {
var random = crypto.randomBytes(arraySize);
return Array.prototype.slice.apply(random);
}
@MidnightLightning
Copy link
Author

Hmmm, I am seeing a slowdown doing it that way, but I took a deeper look at the internals and found something else interesting:

It seems the generation of a Typed Array by initializing it and loading it at the same time is quite inefficient:

var rounds = 5000;
var arraySize = 10000;
var va = bufToArray(crypto.randomBytes(arraySize).slice(0);

console.time('Array to ArrayBufferView');
for (var i = 0; i < rounds; i++) {
  var i8 = new Uint8Array(new Array(arraySize));
  var i8 = null;
}
console.timeEnd('Array to ArrayBufferView');

console.time('ArrayBufferView init');
for (var i = 0; i < rounds; i++) {
  var i8 = new Uint8Array(arraySize);
  var i8 = null;
}
console.timeEnd('ArrayBufferView init');

console.time('ArrayBufferView init, then load');
for (var i = 0; i < rounds; i++) {
  var i8 = new Uint8Array(arraySize);
  for (var x = 0; x < va.length; x++) {
    i8[x] = va[x];
  }
  var i8 = null;
}
console.timeEnd('ArrayBufferView init, then load');

console.time('ArrayBufferView init, then set()');
for (var i = 0; i < rounds; i++) {
  var i8 = new Uint8Array(arraySize);
  i8.set(va);
  var i8 = null;
}
console.timeEnd('ArrayBufferView init, then set()');

Initializing a Typed Array with an existing sequence object that's not another Typed Array (e.g. a vanilla Array or Buffer) seems to be a bottleneck. The "Array to ArrayBufferView" process (new Uint8Array(new Array(arraySize))) is really slow (12,000ms), and using a Buffer (new Uint8Array(new Buffer(arraySize))) isn't much faster (9,500 ms). Using the set() method after being initialized seems to have the same issue. However, initializing (new Uint8Array(arraySize)) and then loading using the Typed Array's index accessors is much faster (550 ms).

So, updating the tests to be:

console.log('8-bit numbers:');

console.time('Node Buffer object');
for (var i = 0; i < rounds; i++) {
  var buf = crypto.randomBytes(arraySize).slice(0);
  for (var x = 0; x < nb.length-10; x++) {
    buf.writeUInt8(buf.readUInt8(x+5), x);
  }
}
console.timeEnd('Node Buffer object');

console.time('DataView');
for (var i = 0; i < rounds; i++) {
  var i8 = new Uint8Array(crypto.randomBytes(arraySize).slice(0));
  var buf = new DataView(i8.buffer);
  for (var x = 0; x < buf.length-10; x++) {
    buf.setUint8(buf.getUint8(x+5), x);
  }
}
console.timeEnd('DataView');

console.time('DataView loaded by ArrayBufferView');
for (var i = 0; i < rounds; i++) {
  var nb = crypto.randomBytes(arraySize).slice(0);
  var i8 = new Uint8Array(nb.length);
  for (var x = 0; x < nb.length; x++) {
    i8[x] = nb[x];
  }
  var buf = new DataView(i8.buffer);
  for (var x = 0; x < buf.length-10; x++) {
    buf.setUint8(buf.getUint8(x+5), x);
  }
}
console.timeEnd('DataView loaded by ArrayBufferView');

console.time('DataView loaded by DataView');
for (var i = 0; i < rounds; i++) {
  var nb = crypto.randomBytes(arraySize).slice(0);
  var i8 = new Uint8Array(nb.length);
  var buf = new DataView(i8.buffer);
  for (var x = 0; x < buf.length; x++) {
    buf.setUint8(nb[x]);
  }
  for (var x = 0; x < buf.length-10; x++) {
    buf.setUint8(buf.getUint8(x+5), x);
  }
}
console.timeEnd('DataView loaded by DataView');

I get results like:

8-bit numbers:
Node Buffer object: 4821ms
DataView: 7164ms
DataView loaded by ArrayBufferView: 2793ms
DataView loaded by DataView: 2733ms

So, I do see the "DataView" showing almost x2 "Node Buffer object" like you mentioned, but using a different init/load strategy drops that to x0.5 instead. Think that's another optimization taking effect?

I updated the benchmark.js file in this gist with these updates if you want to re-run yourself!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment