Skip to content

Instantly share code, notes, and snippets.

@Azmisov
Last active March 29, 2025 09:43
Show Gist options
  • Save Azmisov/4d265e630ab068f945a72d3e59ea3cca to your computer and use it in GitHub Desktop.
Save Azmisov/4d265e630ab068f945a72d3e59ea3cca to your computer and use it in GitHub Desktop.
Benchmark javascript Set vs Array performance for inserting elements
import fs from "node:fs";
import { fileURLToPath } from 'url'
const __filename = fileURLToPath(import.meta.url)
import { Worker, isMainThread, parentPort } from 'node:worker_threads';
import { performance } from "node:perf_hooks";
/** Output filename */
const output_fname = "./results.csv";
/** Use a wrapper around Set, to make the comparison more even for inlined array operations */
const COMPARE_INLINED = true;
/** Approxzimate time each test should last for each size */
const TIME_PER_TEST = 500;
// global used to prevent possible optimization of unused results
let _x = 0;
/** Set implemented using an array internally */
class ArraySet{
constructor(){
this._arr = [];
}
add(value){
if (this._arr.indexOf(value) === -1)
this._arr.push(value);
return this;
}
delete(value){
const idx = this._arr.indexOf(value);
if (idx !== -1){
this._arr.splice(idx,1);
return true;
}
return false;
}
has(value){
return this._arr.indexOf(value) !== -1;
}
clear(){
this._arr.length = 0;
}
get size(){
return this._arr.length;
}
loop(){
for (const v of this._arr)
_x += v;
}
values(){
return this._arr.values();
}
[Symbol.iterator](){
return this._arr[Symbol.iterator]();
}
}
class WrappedSet{
constructor(){
this._set = new Set();
}
add(value){ return this._set.add(value); }
delete(value){ return this._set.delete(value); }
has(value){ return this._set.has(value); }
clear(){ return this._set.clear(); }
get size(){ return this._set.size; }
loop(){
for (const v of this._set)
_x += v;
}
values(){ return this._set.values(); }
[Symbol.iterator](){ return this._set[Symbol.iterator](); }
}
const test_cases = {
size(){ _x += this.size; },
add(v){ this.add(v); },
delete(v){ this.delete(v); },
has(v){ this.has(v); },
iter(){
for (const v of this)
_x += v;
},
values(){
for (const v of this.values())
_x += v;
},
loop(){ this.loop(); },
};
function shuffle(array){
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[array[i], array[j]] = [array[j], array[i]];
}
}
/** Worker thread used to run tests
* @type {Worker}
*/
let worker;
/** Pending result from the worker
* @type {Promise}
*/
let pending_result;
let pending_resolve;
function worker_task(){
pending_result = new Promise(resolve => {
pending_resolve = resolve;
});
}
/** Flag which is set by parent to indicate the test should stop
* @type {SharedArrayBuffer}
*/
let stop_test_flag = new Uint8Array(new SharedArrayBuffer(1));
const test_modes = {
test: test,
avg_test: avg_test,
interlaced_avg_test: interlaced_avg_test
};
/** Run a test function on ArraySet or Set
* @param {function} fn test function; it will be bound to the ArraySet or Set and called with a single value arg
* @param {number} which 0 or 1, for ArraySet or Set respectively
* @param {number} size number of unique elements
* @param {number} modify fraction of size indicating how many times fn is called
* @param {number} duplicate when passing in a value to fn, how many of those values should
* be duplicates
* @param {number} samples how many samples to take
* @returns {object} `{duration: milliseconds, sample_speed: samples per millisecond}`
*/
function test({fn, which, size, modify=.5, duplicate=.2, samples=100000}={}){
const start = performance.now();
modify = Math.floor(size*modify);
const modify_old = Math.floor(duplicate*modify);
const modify_new = modify - modify_old;
let sample=0
for (; sample<samples; sample++){
// setup test case
let arr = [];
while (arr.length < size)
arr.push(Math.random());
// deduplicate
let set = new Set(arr);
arr = Array.from(set);
// get list of values to pass to function
let vals = [];
for (let i=0; i<modify_old; i++)
vals.push(arr[Math.floor(Math.random()*size)]);
for (let i=0; i<modify_new; i++)
vals.push(Math.random());
// shuffle to mess with branch prediction
shuffle(vals);
// run function on arrary or set, as specified by `which`
let arr_set = new ArraySet();
arr_set._arr = arr;
const objs = [arr_set, set];
if (COMPARE_INLINED){
const wset = new WrappedSet()
wset._set = set;
objs[1] = wset;
}
const obj = objs[which];
for (const val of vals)
fn.call(obj, val);
// prevent optimizing out unused code
_x += arr.length + set.size + arr_set.size + vals.length;
// parent thread indicates the test should stop
if (Atomics.compareExchange(stop_test_flag, 0, 1, 0))
break;
}
const end = performance.now();
const duration = end-start;
const sample_speed = sample/duration;
return {duration, sample_speed};
}
/** Take average of several tests, discarding the worst
* @param {number} runs number of runs to do
* @param {number} discard discard this many worst results
* @param opts forwarded to `test`
* @returns {number} average time taken in milliseconds
*/
function avg_test(runs, discard, opts){
let times = [];
for (let r=0; r<runs; r++)
times.push(test(opts).duration);
times.sort((a,b) => a-b);
let sum = 0;
let n = runs-discard;
for (let r=0; r<n; r++)
sum += times[r];
return sum/n;
}
/** Runs `avg_test` twice, interlacing whether we test ArraySet or Set
* @returns {number[]} average for [ArraySet, Set] respectively
*/
function interlaced_avg_test(runs, discard, opts){
const a_opts = {...opts, which:0};
const s_opts = {...opts, which:1}
// interlace, sometimes execution order seems to affect results
const aa = avg_test(runs, discard, a_opts);
const sa = avg_test(runs, discard, s_opts);
const ab = avg_test(runs, discard, a_opts);
const sb = avg_test(runs, discard, s_opts);
return [.5*(aa+ab), .5*(sa+sb)];
}
/** Run a test in the worker thread
* @param {string} mode which test function to run
* @param {array} args aruments to pass
* @returns {Promise} resolves when test has finished
*/
async function worker_test(mode, args){
worker_task();
// can't serialize reference to function
const opts = args.at(-1);
opts.fn = opts.fn.name;
worker.postMessage({mode, args});
return await pending_result;
}
/** Autodetect how many samples to take for a test to take a fixed amount of time
* @param {number} desired_time how many milliseconds the test should run for
* @param opts options for `test`
* @returns {number} samples to use
*/
async function autodetect_samples(desired_time, opts){
opts = Object.assign({}, opts);
opts.samples = Infinity;
setTimeout(() => {
Atomics.store(stop_test_flag, 0, 1);
}, desired_time*2);
const res = await worker_test("test", [opts]);
return res.sample_speed*desired_time;
}
const regression_x = [""];
const regression_y = [[]];
/** Run tests with progressively larger number of elements */
async function regression(fn){
const timeout = TIME_PER_TEST; // ms each test should take
const runs = 5;
const discard = 2;
let opts = {fn,size:0};
// increment array size and autodetect number of samples to do
async function get_next_opts(){
let size = opts.size;
// if (size < 15)
// size++;
// else if (size < 30)
// size += 5;
// else if (size < 100)
// size += 10;
// else if (size < 500)
// size += 50;
// else return null;
if (size < 100)
size++;
else return null;
// autodetect samples
const time_each = timeout/(2*runs);
const a_samples = await autodetect_samples(time_each, {...opts, size, which:0});
const s_samples = await autodetect_samples(time_each, {...opts, size, which:1});
const samples = Math.min(a_samples, s_samples);
return {...opts, size, samples};
}
let next_opts = get_next_opts();
let i = 1;
regression_y[0].push(fn.name);
console.log("testing "+fn.name);
while (true){
opts = await next_opts;
if (opts === null)
break;
// start finding next args in parallel
next_opts = get_next_opts();
// run test for current args
const [a_time, s_time] = interlaced_avg_test(runs, discard, opts);
// size -> how much faster set is than array (.5 = twice as fast, 1 = just as fast, 2 = twice as slow)
if (regression_x.length <= i){
regression_x.push(opts.size);
regression_y.push([]);
}
regression_y[i].push(s_time/a_time);
i++;
console.log(`${opts.size}\t${s_time/a_time}`);
}
}
async function main(){
for (const k in test_cases)
await regression(test_cases[k]);
let out = "";
for (let i=0; i<regression_x.length; i++){
let row = regression_y[i];
row.unshift(regression_x[i]);
out += row.join(",")+"\n";
}
console.log("Writing results to:", output_fname);
fs.writeFileSync(output_fname, out);
// cleanup and exit
worker.terminate();
if (_x === Infinity)
console.log("dummy:", _x);
}
// Thread setup
if (isMainThread){
// Setup worker thread to perform the test in
worker = new Worker(__filename);
worker.on("message", (result) => {
pending_resolve(result);
});
worker_task();
pending_result.then(main);
worker.postMessage({mode: "buffer", args: stop_test_flag});
}
else{
// worker thread that runs tests
parentPort.on("message", ({mode, args}) => {
if (mode === "buffer"){
stop_test_flag = args;
parentPort.postMessage("ready");
}
else{
const opts = args.at(-1);
opts.fn = test_cases[opts.fn];
parentPort.postMessage(test_modes[mode](...args));
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment