Last active
March 29, 2025 09:43
-
-
Save Azmisov/4d265e630ab068f945a72d3e59ea3cca to your computer and use it in GitHub Desktop.
Benchmark javascript Set vs Array performance for inserting elements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from "node:fs"; | |
import { fileURLToPath } from 'url' | |
const __filename = fileURLToPath(import.meta.url) | |
import { Worker, isMainThread, parentPort } from 'node:worker_threads'; | |
import { performance } from "node:perf_hooks"; | |
/** Output filename */ | |
const output_fname = "./results.csv"; | |
/** Use a wrapper around Set, to make the comparison more even for inlined array operations */ | |
const COMPARE_INLINED = true; | |
/** Approxzimate time each test should last for each size */ | |
const TIME_PER_TEST = 500; | |
// global used to prevent possible optimization of unused results | |
let _x = 0; | |
/** Set implemented using an array internally */ | |
class ArraySet{ | |
constructor(){ | |
this._arr = []; | |
} | |
add(value){ | |
if (this._arr.indexOf(value) === -1) | |
this._arr.push(value); | |
return this; | |
} | |
delete(value){ | |
const idx = this._arr.indexOf(value); | |
if (idx !== -1){ | |
this._arr.splice(idx,1); | |
return true; | |
} | |
return false; | |
} | |
has(value){ | |
return this._arr.indexOf(value) !== -1; | |
} | |
clear(){ | |
this._arr.length = 0; | |
} | |
get size(){ | |
return this._arr.length; | |
} | |
loop(){ | |
for (const v of this._arr) | |
_x += v; | |
} | |
values(){ | |
return this._arr.values(); | |
} | |
[Symbol.iterator](){ | |
return this._arr[Symbol.iterator](); | |
} | |
} | |
class WrappedSet{ | |
constructor(){ | |
this._set = new Set(); | |
} | |
add(value){ return this._set.add(value); } | |
delete(value){ return this._set.delete(value); } | |
has(value){ return this._set.has(value); } | |
clear(){ return this._set.clear(); } | |
get size(){ return this._set.size; } | |
loop(){ | |
for (const v of this._set) | |
_x += v; | |
} | |
values(){ return this._set.values(); } | |
[Symbol.iterator](){ return this._set[Symbol.iterator](); } | |
} | |
const test_cases = { | |
size(){ _x += this.size; }, | |
add(v){ this.add(v); }, | |
delete(v){ this.delete(v); }, | |
has(v){ this.has(v); }, | |
iter(){ | |
for (const v of this) | |
_x += v; | |
}, | |
values(){ | |
for (const v of this.values()) | |
_x += v; | |
}, | |
loop(){ this.loop(); }, | |
}; | |
function shuffle(array){ | |
for (let i = array.length - 1; i > 0; i--) { | |
const j = Math.floor(Math.random() * (i + 1)); | |
[array[i], array[j]] = [array[j], array[i]]; | |
} | |
} | |
/** Worker thread used to run tests | |
* @type {Worker} | |
*/ | |
let worker; | |
/** Pending result from the worker | |
* @type {Promise} | |
*/ | |
let pending_result; | |
let pending_resolve; | |
function worker_task(){ | |
pending_result = new Promise(resolve => { | |
pending_resolve = resolve; | |
}); | |
} | |
/** Flag which is set by parent to indicate the test should stop | |
* @type {SharedArrayBuffer} | |
*/ | |
let stop_test_flag = new Uint8Array(new SharedArrayBuffer(1)); | |
const test_modes = { | |
test: test, | |
avg_test: avg_test, | |
interlaced_avg_test: interlaced_avg_test | |
}; | |
/** Run a test function on ArraySet or Set | |
* @param {function} fn test function; it will be bound to the ArraySet or Set and called with a single value arg | |
* @param {number} which 0 or 1, for ArraySet or Set respectively | |
* @param {number} size number of unique elements | |
* @param {number} modify fraction of size indicating how many times fn is called | |
* @param {number} duplicate when passing in a value to fn, how many of those values should | |
* be duplicates | |
* @param {number} samples how many samples to take | |
* @returns {object} `{duration: milliseconds, sample_speed: samples per millisecond}` | |
*/ | |
function test({fn, which, size, modify=.5, duplicate=.2, samples=100000}={}){ | |
const start = performance.now(); | |
modify = Math.floor(size*modify); | |
const modify_old = Math.floor(duplicate*modify); | |
const modify_new = modify - modify_old; | |
let sample=0 | |
for (; sample<samples; sample++){ | |
// setup test case | |
let arr = []; | |
while (arr.length < size) | |
arr.push(Math.random()); | |
// deduplicate | |
let set = new Set(arr); | |
arr = Array.from(set); | |
// get list of values to pass to function | |
let vals = []; | |
for (let i=0; i<modify_old; i++) | |
vals.push(arr[Math.floor(Math.random()*size)]); | |
for (let i=0; i<modify_new; i++) | |
vals.push(Math.random()); | |
// shuffle to mess with branch prediction | |
shuffle(vals); | |
// run function on arrary or set, as specified by `which` | |
let arr_set = new ArraySet(); | |
arr_set._arr = arr; | |
const objs = [arr_set, set]; | |
if (COMPARE_INLINED){ | |
const wset = new WrappedSet() | |
wset._set = set; | |
objs[1] = wset; | |
} | |
const obj = objs[which]; | |
for (const val of vals) | |
fn.call(obj, val); | |
// prevent optimizing out unused code | |
_x += arr.length + set.size + arr_set.size + vals.length; | |
// parent thread indicates the test should stop | |
if (Atomics.compareExchange(stop_test_flag, 0, 1, 0)) | |
break; | |
} | |
const end = performance.now(); | |
const duration = end-start; | |
const sample_speed = sample/duration; | |
return {duration, sample_speed}; | |
} | |
/** Take average of several tests, discarding the worst | |
* @param {number} runs number of runs to do | |
* @param {number} discard discard this many worst results | |
* @param opts forwarded to `test` | |
* @returns {number} average time taken in milliseconds | |
*/ | |
function avg_test(runs, discard, opts){ | |
let times = []; | |
for (let r=0; r<runs; r++) | |
times.push(test(opts).duration); | |
times.sort((a,b) => a-b); | |
let sum = 0; | |
let n = runs-discard; | |
for (let r=0; r<n; r++) | |
sum += times[r]; | |
return sum/n; | |
} | |
/** Runs `avg_test` twice, interlacing whether we test ArraySet or Set | |
* @returns {number[]} average for [ArraySet, Set] respectively | |
*/ | |
function interlaced_avg_test(runs, discard, opts){ | |
const a_opts = {...opts, which:0}; | |
const s_opts = {...opts, which:1} | |
// interlace, sometimes execution order seems to affect results | |
const aa = avg_test(runs, discard, a_opts); | |
const sa = avg_test(runs, discard, s_opts); | |
const ab = avg_test(runs, discard, a_opts); | |
const sb = avg_test(runs, discard, s_opts); | |
return [.5*(aa+ab), .5*(sa+sb)]; | |
} | |
/** Run a test in the worker thread | |
* @param {string} mode which test function to run | |
* @param {array} args aruments to pass | |
* @returns {Promise} resolves when test has finished | |
*/ | |
async function worker_test(mode, args){ | |
worker_task(); | |
// can't serialize reference to function | |
const opts = args.at(-1); | |
opts.fn = opts.fn.name; | |
worker.postMessage({mode, args}); | |
return await pending_result; | |
} | |
/** Autodetect how many samples to take for a test to take a fixed amount of time | |
* @param {number} desired_time how many milliseconds the test should run for | |
* @param opts options for `test` | |
* @returns {number} samples to use | |
*/ | |
async function autodetect_samples(desired_time, opts){ | |
opts = Object.assign({}, opts); | |
opts.samples = Infinity; | |
setTimeout(() => { | |
Atomics.store(stop_test_flag, 0, 1); | |
}, desired_time*2); | |
const res = await worker_test("test", [opts]); | |
return res.sample_speed*desired_time; | |
} | |
const regression_x = [""]; | |
const regression_y = [[]]; | |
/** Run tests with progressively larger number of elements */ | |
async function regression(fn){ | |
const timeout = TIME_PER_TEST; // ms each test should take | |
const runs = 5; | |
const discard = 2; | |
let opts = {fn,size:0}; | |
// increment array size and autodetect number of samples to do | |
async function get_next_opts(){ | |
let size = opts.size; | |
// if (size < 15) | |
// size++; | |
// else if (size < 30) | |
// size += 5; | |
// else if (size < 100) | |
// size += 10; | |
// else if (size < 500) | |
// size += 50; | |
// else return null; | |
if (size < 100) | |
size++; | |
else return null; | |
// autodetect samples | |
const time_each = timeout/(2*runs); | |
const a_samples = await autodetect_samples(time_each, {...opts, size, which:0}); | |
const s_samples = await autodetect_samples(time_each, {...opts, size, which:1}); | |
const samples = Math.min(a_samples, s_samples); | |
return {...opts, size, samples}; | |
} | |
let next_opts = get_next_opts(); | |
let i = 1; | |
regression_y[0].push(fn.name); | |
console.log("testing "+fn.name); | |
while (true){ | |
opts = await next_opts; | |
if (opts === null) | |
break; | |
// start finding next args in parallel | |
next_opts = get_next_opts(); | |
// run test for current args | |
const [a_time, s_time] = interlaced_avg_test(runs, discard, opts); | |
// size -> how much faster set is than array (.5 = twice as fast, 1 = just as fast, 2 = twice as slow) | |
if (regression_x.length <= i){ | |
regression_x.push(opts.size); | |
regression_y.push([]); | |
} | |
regression_y[i].push(s_time/a_time); | |
i++; | |
console.log(`${opts.size}\t${s_time/a_time}`); | |
} | |
} | |
async function main(){ | |
for (const k in test_cases) | |
await regression(test_cases[k]); | |
let out = ""; | |
for (let i=0; i<regression_x.length; i++){ | |
let row = regression_y[i]; | |
row.unshift(regression_x[i]); | |
out += row.join(",")+"\n"; | |
} | |
console.log("Writing results to:", output_fname); | |
fs.writeFileSync(output_fname, out); | |
// cleanup and exit | |
worker.terminate(); | |
if (_x === Infinity) | |
console.log("dummy:", _x); | |
} | |
// Thread setup | |
if (isMainThread){ | |
// Setup worker thread to perform the test in | |
worker = new Worker(__filename); | |
worker.on("message", (result) => { | |
pending_resolve(result); | |
}); | |
worker_task(); | |
pending_result.then(main); | |
worker.postMessage({mode: "buffer", args: stop_test_flag}); | |
} | |
else{ | |
// worker thread that runs tests | |
parentPort.on("message", ({mode, args}) => { | |
if (mode === "buffer"){ | |
stop_test_flag = args; | |
parentPort.postMessage("ready"); | |
} | |
else{ | |
const opts = args.at(-1); | |
opts.fn = test_cases[opts.fn]; | |
parentPort.postMessage(test_modes[mode](...args)); | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment