Created
December 21, 2022 15:04
-
-
Save fabiospampinato/b434e799a50757c36613a4a3de176202 to your computer and use it in GitHub Desktop.
A little toy hash function that maybe works somewhat
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* IMPORT */ | |
const crypto = require ( 'crypto' ); | |
/* MAIN */ | |
// const hash = str => { | |
// const PRIMEn = 115792089237316195423570985008687907853269984665640564039457584007913129639349n; // 2**256-587 - https://primes.utm.edu/lists/2small/200bit.html | |
// const SEEDn = 340282366920938463463374607431768211099n; // 2**128-357 - https://primes.utm.edu/lists/2small/100bit.html | |
// const K_BYTES = 16; | |
// const K_BITS = K_BYTES * 8; | |
// const W_BYTES = K_BYTES * 2; | |
// const W_BITS = W_BYTES * 8; | |
// const K_BITSn = BigInt ( K_BITS ); | |
// const W_BITSn = BigInt ( W_BITS ); | |
// const K_MAXn = ( 2n ** K_BITSn ) - 1n; | |
// const W_MAXn = ( 2n ** W_BITSn ) - 1n; | |
// const KW_DELTA_BITSn = W_BITSn - K_BITSn; | |
// const encoder = new TextEncoder (); | |
// const bytes = encoder.encode ( str ); | |
// const bytesLength = bytes.length; | |
// let hash = SEEDn & K_MAXn; | |
// for ( let ci = 0, cl = Math.ceil ( bytesLength / K_BYTES ); ci < cl; ci++ ) { | |
// let chunk = 0n; | |
// for ( let bi = ci * K_BYTES, bl = bi + K_BYTES; bi < bl && bi < bytesLength; bi++ ) { | |
// chunk = ( chunk << 8n ) | BigInt ( bytes[bi] ); | |
// } | |
// let input = ( chunk << K_BITSn ) | hash; | |
// hash = ( ( input * PRIMEn ) & W_MAXn ) >> KW_DELTA_BITSn; | |
// } | |
// return hash; | |
// }; | |
// const hash = str => { | |
// const PRIMEn = 18446744073709551437n // 2**64-179 - https://primes.utm.edu/lists/2small/0bit.html | |
// const SEEDn = 0n; | |
// const K_BYTES = 8; | |
// const K_BITS = K_BYTES * 8; | |
// const W_BYTES = K_BYTES * 2; | |
// const W_BITS = W_BYTES * 8; | |
// const K_BITSn = BigInt ( K_BITS ); | |
// const W_BITSn = BigInt ( W_BITS ); | |
// const K_MAXn = ( 2n ** K_BITSn ) - 1n; | |
// const W_MAXn = ( 2n ** W_BITSn ) - 1n; | |
// const KW_DELTA_BITSn = W_BITSn - K_BITSn; | |
// const encoder = new TextEncoder (); | |
// let bytes = encoder.encode ( str ); | |
// const bytesLengthAligned = bytes.length + ( ( 8 - ( bytes.length % 8 ) ) % 8 ); | |
// if ( bytes.length !== bytesLengthAligned ) { | |
// const a = new ArrayBuffer ( bytesLengthAligned ); | |
// const bytes2 = new Uint8Array ( a ); | |
// bytes2.set ( bytes ); | |
// bytes = bytes2; | |
// } | |
// const bytesBigInt = new BigUint64Array ( bytes.buffer ); | |
// const bytesLength = bytesBigInt.length; | |
// let hash = SEEDn & K_MAXn; | |
// for ( let bi = 0, bl = bytesLength; bi < bl; bi++ ) { | |
// const chunk = bytesBigInt[bi]; | |
// const input = ( chunk << K_BITSn ) | hash; | |
// hash = ( ( input * PRIMEn ) & W_MAXn ) >> KW_DELTA_BITSn; | |
// } | |
// return hash; | |
// }; | |
const hash = str => { | |
const PRIME = 251; // 2**8-5 - https://primes.utm.edu/lists/2small/0bit.html | |
const SEED = 0; | |
const K_BYTES = 1; | |
const K_BITS = K_BYTES * 8; | |
const K_MAX = ( 2 ** K_BITS ) - 1; | |
const W_BYTES = K_BYTES * 2; | |
const W_BITS = W_BYTES * 8; | |
const W_MAX = ( 2 ** W_BITS ) - 1; | |
const KW_DELTA_BITS = W_BITS - K_BITS; | |
const encoder = new TextEncoder (); | |
let bytes = encoder.encode ( str ); | |
let bytesLength = bytes.length; | |
// const bytesLengthAligned = bytes.length + ( ( 8 - ( bytes.length % 8 ) ) % 8 ); | |
// if ( bytes.length !== bytesLengthAligned ) { | |
// const a = new ArrayBuffer ( bytesLengthAligned ); | |
// const bytes2 = new Uint8Array ( a ); | |
// bytes2.set ( bytes ); | |
// bytes = bytes2; | |
// } | |
// const bytesBigInt = new BigUint64Array ( bytes.buffer ); | |
// const bytesLength = bytesBigInt.length; | |
let hash = SEED & K_MAX; | |
for ( let bi = 0, bl = bytesLength; bi < bl; bi++ ) { | |
const chunk = bytes[bi]; | |
const input = ( chunk << K_BITS ) | hash; | |
hash = ( ( input * PRIME ) & W_MAX ) >> KW_DELTA_BITS; | |
} | |
return hash; | |
}; | |
/* BENCHMARK */ | |
{ // Hash | |
console.time('hash.small'); | |
const buckets = {}; | |
const bucketsSize = 1_000_000; | |
for ( let i = 0; i < bucketsSize; i++ ) { | |
const h = hash ( i.toString ( 32 ) ); | |
buckets[h] = ( buckets[h] || 0 ) + 1; | |
} | |
console.log ( `Collisions: ${bucketsSize - Object.values ( buckets ).length}` ); | |
console.timeEnd('hash.small'); | |
const big = 'a'.repeat ( 1000000 ); | |
console.time('hash.big'); | |
hash ( big ); | |
console.timeEnd('hash.big'); | |
} | |
{ // Sha1 | |
const sha1 = str => crypto.createHash ( 'sha1' ).update ( str ).digest ( 'hex' ); | |
console.time('sha1'); | |
const buckets = {}; | |
const bucketsSize = 1_000_000; | |
for ( let i = 0; i < bucketsSize; i++ ) { | |
const h = sha1 ( i.toString ( 32 ) ); | |
buckets[h] = ( buckets[h] || 0 ) + 1; | |
} | |
console.log ( `Collisions: ${bucketsSize - Object.values ( buckets ).length}` ); | |
console.timeEnd('sha1'); | |
const big = 'a'.repeat ( 1000000 ); | |
console.time('sha1.big'); | |
sha1 ( big ); | |
console.timeEnd('sha1.big'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment