Skip to content

Instantly share code, notes, and snippets.

@rubensworks
Created September 20, 2024 14:17
Show Gist options
  • Save rubensworks/65eed6b1d9885565c6eae1ea53ef4694 to your computer and use it in GitHub Desktop.
Save rubensworks/65eed6b1d9885565c6eae1ea53ef4694 to your computer and use it in GitHub Desktop.
comunica-hash-perf-test.js
const BindingsFactory = require('./packages/bindings-factory').BindingsFactory;
const DataFactory = require('rdf-data-factory').DataFactory;
const { sha1 } = require('hash.js');
const canonicalize = require('canonicalize');
const { termToString } = require('rdf-string');
const MurmurHash3 = require('imurmurhash');
const xxHash32 = require('js-xxhash').xxHash32;
const DF = new DataFactory();
const BF = new BindingsFactory(DF);
function makeBinding(size, random, start = 0) {
let entries = [];
for (let i = start; i < size + start; i++) {
entries.push([DF.variable('a' + random + i), DF.namedNode('ex:a' + random + i) ])
}
return BF.bindings(entries);
}
function makeBindings(size, amount) {
const bindings = [];
for (let i = 0; i < amount; i++) {
bindings.push(makeBinding(size, Math.random(), i));
}
return bindings;
}
function runPhase(bindings, amount, hashFn) {
const index = new Set();
for (let i = 0; i < amount; i++) {
for (const binding of bindings) {
index.add(hashFn(binding));
}
}
}
function evaluateHashFn(name, bindings, hashFn, amount) {
console.log(`Evaluating ${name} with ${bindings.length} bindings with replication ${amount}...`); // TODO
runPhase(bindings, amount / 10, hashFn);
console.time(name);
runPhase(bindings, amount, hashFn);
console.timeEnd(name);
}
const hashFnSha1 = bindings => sha1()
.update([ ...bindings.values() ].map(value => value.value).join(''))
.digest('hex');
const hashFnSha1Number = bindings => sha1()
.update([ ...bindings.values() ].map(value => value.value).join(''))
.digest()[0];
const hashFnSimple = bindings => [ ...bindings.values() ]
.map(value => termToString(value))
.join('');
const hashFnSimpleClash = bindings => [ ...bindings.values() ]
.map(value => value.value)
.join('');
const hashFnMurMur = bindings => {
const h = MurmurHash3();
for (const v of bindings.values()) {
h.hash(v.value);
}
return h.result();
};
const hashFnMurMurConcat = bindings => {
return MurmurHash3([ ...bindings.values() ].map(value => value.value).join(''))
.result();
};
const hashFnxxHash = bindings =>
xxHash32([ ...bindings.values() ].map(value => value.value).join(''));
const REPLICATION = 1000;
console.log('Preparing...'); // TODO
const bindings = makeBindings(30, 1000);
evaluateHashFn('sha1', bindings, hashFnSha1, REPLICATION);
evaluateHashFn('sha1-number', bindings, hashFnSha1Number, REPLICATION);
evaluateHashFn('simple', bindings, hashFnSimple, REPLICATION);
evaluateHashFn('simple-clash', bindings, hashFnSimpleClash, REPLICATION);
evaluateHashFn('murmurhash', bindings, hashFnMurMur, REPLICATION);
evaluateHashFn('murmurhash-concat', bindings, hashFnMurMurConcat, REPLICATION);
evaluateHashFn('xxhash', bindings, hashFnxxHash, REPLICATION);
/*
Output:
Evaluating sha1 with 1000 bindings with replication 1000...
sha1: 24.584s
Evaluating sha1-number with 1000 bindings with replication 1000...
sha1-number: 22.567s
Evaluating simple with 1000 bindings with replication 1000...
simple: 5.829s
Evaluating simple-clash with 1000 bindings with replication 1000...
simple-clash: 5.757s
Evaluating murmurhash with 1000 bindings with replication 1000...
murmurhash: 4.747s
Evaluating murmurhash-concat with 1000 bindings with replication 1000...
murmurhash-concat: 4.957s
Evaluating xxhash with 1000 bindings with replication 1000...
xxhash: 5.305s
murmurhash-concat is fastest!
simple and simple-clash are faster if we're not indexing stuff.
So the number-based output of murmurhash matters in these cases.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment