Created
September 20, 2024 14:17
-
-
Save rubensworks/65eed6b1d9885565c6eae1ea53ef4694 to your computer and use it in GitHub Desktop.
comunica-hash-perf-test.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const BindingsFactory = require('./packages/bindings-factory').BindingsFactory; | |
const DataFactory = require('rdf-data-factory').DataFactory; | |
const { sha1 } = require('hash.js'); | |
const canonicalize = require('canonicalize'); | |
const { termToString } = require('rdf-string'); | |
const MurmurHash3 = require('imurmurhash'); | |
const xxHash32 = require('js-xxhash').xxHash32; | |
const DF = new DataFactory(); | |
const BF = new BindingsFactory(DF); | |
function makeBinding(size, random, start = 0) { | |
let entries = []; | |
for (let i = start; i < size + start; i++) { | |
entries.push([DF.variable('a' + random + i), DF.namedNode('ex:a' + random + i) ]) | |
} | |
return BF.bindings(entries); | |
} | |
function makeBindings(size, amount) { | |
const bindings = []; | |
for (let i = 0; i < amount; i++) { | |
bindings.push(makeBinding(size, Math.random(), i)); | |
} | |
return bindings; | |
} | |
function runPhase(bindings, amount, hashFn) { | |
const index = new Set(); | |
for (let i = 0; i < amount; i++) { | |
for (const binding of bindings) { | |
index.add(hashFn(binding)); | |
} | |
} | |
} | |
function evaluateHashFn(name, bindings, hashFn, amount) { | |
console.log(`Evaluating ${name} with ${bindings.length} bindings with replication ${amount}...`); // TODO | |
runPhase(bindings, amount / 10, hashFn); | |
console.time(name); | |
runPhase(bindings, amount, hashFn); | |
console.timeEnd(name); | |
} | |
const hashFnSha1 = bindings => sha1() | |
.update([ ...bindings.values() ].map(value => value.value).join('')) | |
.digest('hex'); | |
const hashFnSha1Number = bindings => sha1() | |
.update([ ...bindings.values() ].map(value => value.value).join('')) | |
.digest()[0]; | |
const hashFnSimple = bindings => [ ...bindings.values() ] | |
.map(value => termToString(value)) | |
.join(''); | |
const hashFnSimpleClash = bindings => [ ...bindings.values() ] | |
.map(value => value.value) | |
.join(''); | |
const hashFnMurMur = bindings => { | |
const h = MurmurHash3(); | |
for (const v of bindings.values()) { | |
h.hash(v.value); | |
} | |
return h.result(); | |
}; | |
const hashFnMurMurConcat = bindings => { | |
return MurmurHash3([ ...bindings.values() ].map(value => value.value).join('')) | |
.result(); | |
}; | |
const hashFnxxHash = bindings => | |
xxHash32([ ...bindings.values() ].map(value => value.value).join('')); | |
const REPLICATION = 1000; | |
console.log('Preparing...'); // TODO | |
const bindings = makeBindings(30, 1000); | |
evaluateHashFn('sha1', bindings, hashFnSha1, REPLICATION); | |
evaluateHashFn('sha1-number', bindings, hashFnSha1Number, REPLICATION); | |
evaluateHashFn('simple', bindings, hashFnSimple, REPLICATION); | |
evaluateHashFn('simple-clash', bindings, hashFnSimpleClash, REPLICATION); | |
evaluateHashFn('murmurhash', bindings, hashFnMurMur, REPLICATION); | |
evaluateHashFn('murmurhash-concat', bindings, hashFnMurMurConcat, REPLICATION); | |
evaluateHashFn('xxhash', bindings, hashFnxxHash, REPLICATION); | |
/* | |
Output: | |
Evaluating sha1 with 1000 bindings with replication 1000... | |
sha1: 24.584s | |
Evaluating sha1-number with 1000 bindings with replication 1000... | |
sha1-number: 22.567s | |
Evaluating simple with 1000 bindings with replication 1000... | |
simple: 5.829s | |
Evaluating simple-clash with 1000 bindings with replication 1000... | |
simple-clash: 5.757s | |
Evaluating murmurhash with 1000 bindings with replication 1000... | |
murmurhash: 4.747s | |
Evaluating murmurhash-concat with 1000 bindings with replication 1000... | |
murmurhash-concat: 4.957s | |
Evaluating xxhash with 1000 bindings with replication 1000... | |
xxhash: 5.305s | |
murmurhash-concat is fastest! | |
simple and simple-clash are faster if we're not indexing stuff. | |
So the number-based output of murmurhash matters in these cases. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment