Skip to content

Instantly share code, notes, and snippets.

@fabiospampinato
Created December 21, 2022 15:04
Show Gist options
  • Save fabiospampinato/b434e799a50757c36613a4a3de176202 to your computer and use it in GitHub Desktop.
Save fabiospampinato/b434e799a50757c36613a4a3de176202 to your computer and use it in GitHub Desktop.
A little toy hash function that maybe works somewhat
/* IMPORT */
const crypto = require ( 'crypto' );
/* MAIN */
// const hash = str => {
// const PRIMEn = 115792089237316195423570985008687907853269984665640564039457584007913129639349n; // 2**256-587 - https://primes.utm.edu/lists/2small/200bit.html
// const SEEDn = 340282366920938463463374607431768211099n; // 2**128-357 - https://primes.utm.edu/lists/2small/100bit.html
// const K_BYTES = 16;
// const K_BITS = K_BYTES * 8;
// const W_BYTES = K_BYTES * 2;
// const W_BITS = W_BYTES * 8;
// const K_BITSn = BigInt ( K_BITS );
// const W_BITSn = BigInt ( W_BITS );
// const K_MAXn = ( 2n ** K_BITSn ) - 1n;
// const W_MAXn = ( 2n ** W_BITSn ) - 1n;
// const KW_DELTA_BITSn = W_BITSn - K_BITSn;
// const encoder = new TextEncoder ();
// const bytes = encoder.encode ( str );
// const bytesLength = bytes.length;
// let hash = SEEDn & K_MAXn;
// for ( let ci = 0, cl = Math.ceil ( bytesLength / K_BYTES ); ci < cl; ci++ ) {
// let chunk = 0n;
// for ( let bi = ci * K_BYTES, bl = bi + K_BYTES; bi < bl && bi < bytesLength; bi++ ) {
// chunk = ( chunk << 8n ) | BigInt ( bytes[bi] );
// }
// let input = ( chunk << K_BITSn ) | hash;
// hash = ( ( input * PRIMEn ) & W_MAXn ) >> KW_DELTA_BITSn;
// }
// return hash;
// };
// const hash = str => {
// const PRIMEn = 18446744073709551437n // 2**64-179 - https://primes.utm.edu/lists/2small/0bit.html
// const SEEDn = 0n;
// const K_BYTES = 8;
// const K_BITS = K_BYTES * 8;
// const W_BYTES = K_BYTES * 2;
// const W_BITS = W_BYTES * 8;
// const K_BITSn = BigInt ( K_BITS );
// const W_BITSn = BigInt ( W_BITS );
// const K_MAXn = ( 2n ** K_BITSn ) - 1n;
// const W_MAXn = ( 2n ** W_BITSn ) - 1n;
// const KW_DELTA_BITSn = W_BITSn - K_BITSn;
// const encoder = new TextEncoder ();
// let bytes = encoder.encode ( str );
// const bytesLengthAligned = bytes.length + ( ( 8 - ( bytes.length % 8 ) ) % 8 );
// if ( bytes.length !== bytesLengthAligned ) {
// const a = new ArrayBuffer ( bytesLengthAligned );
// const bytes2 = new Uint8Array ( a );
// bytes2.set ( bytes );
// bytes = bytes2;
// }
// const bytesBigInt = new BigUint64Array ( bytes.buffer );
// const bytesLength = bytesBigInt.length;
// let hash = SEEDn & K_MAXn;
// for ( let bi = 0, bl = bytesLength; bi < bl; bi++ ) {
// const chunk = bytesBigInt[bi];
// const input = ( chunk << K_BITSn ) | hash;
// hash = ( ( input * PRIMEn ) & W_MAXn ) >> KW_DELTA_BITSn;
// }
// return hash;
// };
const hash = str => {
const PRIME = 251; // 2**8-5 - https://primes.utm.edu/lists/2small/0bit.html
const SEED = 0;
const K_BYTES = 1;
const K_BITS = K_BYTES * 8;
const K_MAX = ( 2 ** K_BITS ) - 1;
const W_BYTES = K_BYTES * 2;
const W_BITS = W_BYTES * 8;
const W_MAX = ( 2 ** W_BITS ) - 1;
const KW_DELTA_BITS = W_BITS - K_BITS;
const encoder = new TextEncoder ();
let bytes = encoder.encode ( str );
let bytesLength = bytes.length;
// const bytesLengthAligned = bytes.length + ( ( 8 - ( bytes.length % 8 ) ) % 8 );
// if ( bytes.length !== bytesLengthAligned ) {
// const a = new ArrayBuffer ( bytesLengthAligned );
// const bytes2 = new Uint8Array ( a );
// bytes2.set ( bytes );
// bytes = bytes2;
// }
// const bytesBigInt = new BigUint64Array ( bytes.buffer );
// const bytesLength = bytesBigInt.length;
let hash = SEED & K_MAX;
for ( let bi = 0, bl = bytesLength; bi < bl; bi++ ) {
const chunk = bytes[bi];
const input = ( chunk << K_BITS ) | hash;
hash = ( ( input * PRIME ) & W_MAX ) >> KW_DELTA_BITS;
}
return hash;
};
/* BENCHMARK */
{ // Hash
console.time('hash.small');
const buckets = {};
const bucketsSize = 1_000_000;
for ( let i = 0; i < bucketsSize; i++ ) {
const h = hash ( i.toString ( 32 ) );
buckets[h] = ( buckets[h] || 0 ) + 1;
}
console.log ( `Collisions: ${bucketsSize - Object.values ( buckets ).length}` );
console.timeEnd('hash.small');
const big = 'a'.repeat ( 1000000 );
console.time('hash.big');
hash ( big );
console.timeEnd('hash.big');
}
{ // Sha1
const sha1 = str => crypto.createHash ( 'sha1' ).update ( str ).digest ( 'hex' );
console.time('sha1');
const buckets = {};
const bucketsSize = 1_000_000;
for ( let i = 0; i < bucketsSize; i++ ) {
const h = sha1 ( i.toString ( 32 ) );
buckets[h] = ( buckets[h] || 0 ) + 1;
}
console.log ( `Collisions: ${bucketsSize - Object.values ( buckets ).length}` );
console.timeEnd('sha1');
const big = 'a'.repeat ( 1000000 );
console.time('sha1.big');
sha1 ( big );
console.timeEnd('sha1.big');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment