Created
November 26, 2018 15:55
-
-
Save vermorel/01446a53476d983988849726588156c6 to your computer and use it in GitHub Desktop.
Fast MurmurHash3 in C#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using Xunit; | |
namespace MyMurmurHash | |
{ | |
/// <summary> | |
/// Fast hash - non-crypto secure. Intended to detect data storage | |
/// corruption. | |
/// </summary> | |
/// <remarks> | |
/// Based on an original implementation from Adam Horvath's | |
/// http://blog.teamleadnet.com/2012/08/murmurhash3-ultra-fast-hash-algorithm.html | |
/// </remarks> | |
public class MurmurHash3 | |
{ | |
// 128 bit output, 64 bit platform version | |
private const ulong ReadSize = 16; | |
private const ulong C1 = 0x87c37b91114253d5L; | |
private const ulong C2 = 0x4cf5ad432745937fL; | |
/// <remarks>A random default seed.</remarks> | |
private const uint DefaultSeed = 0xdbe22f02; | |
private readonly uint _seed; | |
public MurmurHash3() | |
{ | |
_seed = DefaultSeed; | |
} | |
public MurmurHash3(uint seed) | |
{ | |
_seed = seed; | |
} | |
public Hash128 ComputeHash(Span<byte> bb) | |
{ | |
ulong h1 = _seed; | |
ulong h2 = 0; | |
ulong length = 0; | |
int pos = 0; | |
ulong remaining = (ulong)bb.Length; | |
// read 128 bits, 16 bytes, 2 longs in eacy cycle | |
while (remaining >= ReadSize) | |
{ | |
ulong k1 = bb.GetUInt64(pos); | |
pos += 8; | |
ulong k2 = bb.GetUInt64(pos); | |
pos += 8; | |
length += ReadSize; | |
remaining -= ReadSize; | |
MixBody(k1, k2); | |
} | |
// if the input MOD 16 != 0 | |
if (remaining > 0) | |
{ | |
ProcessBytesRemaining(bb); | |
} | |
return CreateHash(); | |
void MixBody(ulong k1, ulong k2) | |
{ | |
h1 ^= MixKey1(k1); | |
h1 = h1.RotateLeft(27); | |
h1 += h2; | |
h1 = h1 * 5 + 0x52dce729; | |
h2 ^= MixKey2(k2); | |
h2 = h2.RotateLeft(31); | |
h2 += h1; | |
h2 = h2 * 5 + 0x38495ab5; | |
} | |
void ProcessBytesRemaining(Span<byte> rbb) | |
{ | |
ulong k1 = 0; | |
ulong k2 = 0; | |
length += remaining; | |
// little endian (x86) processing | |
switch (remaining) | |
{ | |
case 15: | |
k2 ^= (ulong)rbb[pos + 14] << 48; // fall through | |
goto case 14; | |
case 14: | |
k2 ^= (ulong)rbb[pos + 13] << 40; // fall through | |
goto case 13; | |
case 13: | |
k2 ^= (ulong)rbb[pos + 12] << 32; // fall through | |
goto case 12; | |
case 12: | |
k2 ^= (ulong)rbb[pos + 11] << 24; // fall through | |
goto case 11; | |
case 11: | |
k2 ^= (ulong)rbb[pos + 10] << 16; // fall through | |
goto case 10; | |
case 10: | |
k2 ^= (ulong)rbb[pos + 9] << 8; // fall through | |
goto case 9; | |
case 9: | |
k2 ^= rbb[pos + 8]; // fall through | |
goto case 8; | |
case 8: | |
k1 ^= rbb.GetUInt64(pos); | |
break; | |
case 7: | |
k1 ^= (ulong)rbb[pos + 6] << 48; // fall through | |
goto case 6; | |
case 6: | |
k1 ^= (ulong)rbb[pos + 5] << 40; // fall through | |
goto case 5; | |
case 5: | |
k1 ^= (ulong)rbb[pos + 4] << 32; // fall through | |
goto case 4; | |
case 4: | |
k1 ^= (ulong)rbb[pos + 3] << 24; // fall through | |
goto case 3; | |
case 3: | |
k1 ^= (ulong)rbb[pos + 2] << 16; // fall through | |
goto case 2; | |
case 2: | |
k1 ^= (ulong)rbb[pos + 1] << 8; // fall through | |
goto case 1; | |
case 1: | |
k1 ^= rbb[pos]; // fall through | |
break; | |
default: | |
throw new InvalidOperationException("Something went wrong with remaining bytes calculation."); | |
} | |
h1 ^= MixKey1(k1); | |
h2 ^= MixKey2(k2); | |
} | |
Hash128 CreateHash() | |
{ | |
h1 ^= length; | |
h2 ^= length; | |
h1 += h2; | |
h2 += h1; | |
h1 = MixFinal(h1); | |
h2 = MixFinal(h2); | |
h1 += h2; | |
h2 += h1; | |
return new Hash128(h1, h2); | |
} | |
} | |
private static ulong MixKey1(ulong k1) | |
{ | |
k1 *= C1; | |
k1 = k1.RotateLeft(31); | |
k1 *= C2; | |
return k1; | |
} | |
private static ulong MixKey2(ulong k2) | |
{ | |
k2 *= C2; | |
k2 = k2.RotateLeft(33); | |
k2 *= C1; | |
return k2; | |
} | |
private static ulong MixFinal(ulong k) | |
{ | |
// avalanche bits | |
k ^= k >> 33; | |
k *= 0xff51afd7ed558ccdL; | |
k ^= k >> 33; | |
k *= 0xc4ceb9fe1a85ec53L; | |
k ^= k >> 33; | |
return k; | |
} | |
} | |
static class IntHelpers | |
{ | |
// PERF: 'RotateRight' might be the idiomatic flavor that gets the speed-up | |
// https://github.com/dotnet/coreclr/issues/1619 | |
public static ulong RotateLeft(this ulong original, int bits) | |
{ | |
return (original << bits) | (original >> (64 - bits)); | |
} | |
public static ulong RotateRight(this ulong original, int bits) | |
{ | |
return (original >> bits) | (original << (64 - bits)); | |
} | |
public static ulong GetUInt64(this Span<byte> bb, int pos) | |
{ | |
// PERF: use 'MemoryMarshal.Cast' to speed up this method | |
return (ulong)( | |
bb[pos++] << 0 | bb[pos++] << 8 | bb[pos++] << 16 | bb[pos++] << 24 | | |
bb[pos++] << 32 | bb[pos++] << 40 | bb[pos++] << 48 | bb[pos] << 56); | |
} | |
// [vermorel] 2018-02-13, no Span overload available yet for BitConverter | |
//public static ulong GetUInt64(this byte[] bb, int pos) | |
//{ | |
// return BitConverter.ToUInt64(bb, pos); | |
//} | |
// [vermorel] 2018-02-13, we don't want to introduce unsafe code yet | |
//unsafe public static ulong GetUInt64(this byte[] bb, int pos) | |
//{ | |
// // we only read aligned longs, so a simple casting is enough | |
// fixed (byte* pbyte = &bb[pos]) | |
// { | |
// return *((ulong*)pbyte); | |
// } | |
//} | |
} | |
public class HashMurmurTests | |
{ | |
[Fact(Skip="Hash128 override not implemented.")] | |
public void TestMurmurCollisions() | |
{ | |
var murmur = new MurmurHash3(); | |
const int numHashes = 1_000_000; | |
// Value is seed that generated the byte buffer for that hash | |
var randoms = new Dictionary<Hash128, int>(); | |
for (var seed = 0; seed < numHashes; seed++) | |
{ | |
var random = FromSeed(seed); | |
var hash = murmur.ComputeHash(new Span<byte>(random)); | |
if (randoms.TryGetValue(hash, out var previous)) | |
{ | |
Assert.True(Equal(FromSeed(previous), random)); | |
} | |
else | |
{ | |
randoms.Add(hash, seed); | |
} | |
} | |
bool Equal(byte[] a, byte[] b) | |
{ | |
if (a.Length != b.Length) return false; | |
for (var i = 0; i < a.Length; ++i) | |
if (a[i] != b[i]) | |
return false; | |
return true; | |
} | |
byte[] FromSeed(int seed) | |
{ | |
var rnd = new Random(seed); | |
var length = rnd.Next(0, 1024); | |
var random = new byte[length]; | |
rnd.NextBytes(random); | |
return random; | |
} | |
} | |
[Fact] | |
public void ExplicitHashes() | |
{ | |
var murmur = new MurmurHash3(); | |
Assert.Equal( | |
new Hash128(0xE73861FF89CFCB94UL, 0x3D11EE9A4D2D8FDBUL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x0C, 0x46, 0x6F, 0x5D, 0x75, 0xE4, 0xD8, 0xAD, 0x67, 0x65, | |
0xD5, 0xF5, 0x19, 0xDB, 0xC8, 0x2B, 0x7C, 0x34, 0x3B, 0x37, | |
0xF8, 0x85, 0x00, 0xEC, 0x5E, 0x64, 0x00, 0x53, 0x93, 0xB3, | |
0x0D, 0x4A, 0x40, 0x29, 0x75, 0xCF, 0x74, 0x6C, 0x0F, 0xF4, | |
0xBE, 0xC2, 0x4D, 0x1E, 0x04, 0xEB | |
}))); | |
Assert.Equal( | |
new Hash128(0xEBE484D89481C7E5UL, 0xF3C4EC3250791AD4UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0xD0, 0x86, 0x82, 0x40, 0x97, 0xE4, 0xA3, 0x95, 0xCF, 0xFF, | |
0x46, 0x69, 0x9C, 0x73, 0xC4 | |
}))); | |
Assert.Equal( | |
new Hash128(0xDB5727E7A452A0C3UL, 0x3218FE120EF26A02UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x93, 0xC6, 0x95, 0x24, 0xB9, 0xE3, 0x6F, 0x7C, 0x38, 0x98, | |
0xB7, 0xDD, 0x20, 0x0B, 0xC1, 0x16, 0x1E, 0xEC, 0x2E, 0xF0, | |
0xBD, 0x16, 0x46, 0xF2, 0xA9, 0xE6, 0x93, 0x6C, 0x62, 0x69, | |
0x33, 0xEF, 0x85, 0x5A, 0xB9, 0x0A, 0x53, 0xB0, 0xF5, 0xA8, | |
0x47, 0x81, 0x2F, 0xFF, 0x72, 0xBC, 0x65, 0x2B, 0xAE | |
}))); | |
Assert.Equal( | |
new Hash128(0x509D537B30B13CC6UL, 0x712F56AB51DC5499UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x56, 0x05, 0xA9, 0x07, 0xDB, 0xE3, 0x3A, 0x63, 0xA0, 0x32, | |
0x27, 0x50, 0xA4, 0xA3, 0xBD, 0x8C, 0x6E, 0xC8 | |
}))); | |
Assert.Equal( | |
new Hash128(0x637EF97E412E4149UL, 0xEEA7C81F0FC34D1FUL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x19, 0x45, 0xBC, 0xEB, 0xFD, 0xE2, 0x06, 0x4A, 0x08, 0xCC, | |
0x98, 0xC4, 0x27, 0x3A, 0xBA, 0x01, 0xBF, 0xA4, 0x20, 0xA8, | |
0x81, 0xA6, 0x8D, 0xF9, 0xF5, 0x68, 0x27, 0x84, 0x32, 0x20, | |
0x5A, 0x93, 0xC9, 0x8B, 0xFD, 0x46, 0x33, 0xF5, 0xDA, 0x5B, | |
0xCF, 0x40, 0x11, 0xE1, 0xE0, 0x8C, 0x7F, 0xA1, 0xC4, 0xA6, | |
0x66, 0x59 | |
}))); | |
Assert.Equal( | |
new Hash128(0x7391E9B717793FDBUL, 0x7931A53A82C2BDDDUL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0xDD, 0x85, 0xCF, 0xCE, 0x1E, 0xE2, 0xD1, 0x31, 0x71, 0x65, | |
0x09, 0x38, 0xAB, 0xD2, 0xB7, 0x76, 0x10, 0x80, 0x1A, 0x84, | |
0xE4 | |
}))); | |
Assert.Equal( | |
new Hash128(0xBDFE03BD2CADC0B3UL, 0xDEBCF6CB911681B0UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0xA0, 0xC4, 0xE2, 0xB1, 0x40, 0xE1, 0x9D, 0x18, 0xD9, 0xFF, | |
0x7A, 0xAB, 0x2E, 0x6A, 0xB3, 0xEC, 0x60, 0x5C, 0x13, 0x60, | |
0x46, 0x37, 0xD4, 0xFF, 0x40, 0xEA, 0xBA, 0x9D, 0x01, 0xD7, | |
0x81, 0x38, 0x0D, 0xBB, 0x42, 0x82, 0x12, 0x39, 0xBF, 0x0F, | |
0x58, 0xFF, 0xF3, 0xC2, 0x4E, 0x5D, 0x99, 0x17, 0xDB, 0xFB, | |
0x45, 0x62, 0xEE, 0x55, 0xD9 | |
}))); | |
Assert.Equal( | |
new Hash128(0xF3E1643FCE9EB048UL, 0xB66D2B9F3C14788AUL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x63, 0x04, 0xF6, 0x95, 0x62, 0xE1, 0x68, 0xFF, 0x41, 0x99, | |
0xEA, 0x1F, 0xB2, 0x01, 0xB0, 0x61, 0xB1, 0x38, 0x0D, 0x3C, | |
0xA8, 0xFF, 0x77, 0x82 | |
}))); | |
Assert.Equal( | |
new Hash128(0x9D9B12AEF8475FD5UL, 0xF2B6EE0739DBAE27UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0x27, 0x44, 0x09, 0x78, 0x84, 0xE0, 0x34, 0xE6, 0xAA, 0x33, | |
0x5B, 0x93, 0x35, 0x99, 0xAD, 0xD7, 0x01, 0x14, 0x06, 0x18, | |
0x0A, 0xC7, 0x1A, 0x05, 0x8B, 0x6B, 0x4D, 0xB5, 0xD1, 0x8E, | |
0xA7, 0xDD, 0x51, 0xEC, 0x86, 0xBD, 0xF1, 0x7D, 0xA4, 0xC2, | |
0xE0, 0xBE, 0xD6, 0xA3, 0xBB, 0x2E, 0xB3, 0x8D, 0xF1, 0x4F, | |
0x24, 0x6C, 0x57, 0xA3, 0x6C, 0x5F, 0x29 | |
}))); | |
Assert.Equal( | |
new Hash128(0xDAFC6D939D01D280UL, 0x64DA2389BE032CF0UL), | |
murmur.ComputeHash(new Span<byte>(new byte[] | |
{ | |
0xEA, 0x84, 0x1C, 0x5C, 0xA6, 0xDF, 0xFF, 0xCE, 0x12, 0xCC, | |
0xCC, 0x06, 0xB9, 0x31, 0xA9, 0x4C, 0x52, 0xF0, 0xFF, 0xF5, | |
0x6D, 0x8F, 0xBE, 0x88, 0xB1, 0xAC, 0x97 | |
}))); | |
// The code above was generated with: | |
//byte[] FromSeed(int seed) | |
//{ | |
// var rnd = new Random(seed); | |
// var length = rnd.Next(0, 64); | |
// var random = new byte[length]; | |
// rnd.NextBytes(random); | |
// return random; | |
//} | |
//var murmur = new MurmurHash3(); | |
//for (var i = 0; i < 10; ++i) | |
//{ | |
// var random = FromSeed(i); | |
// var hash = murmur.ComputeHash(new Span<byte>(random)); | |
// Console.WriteLine("Assert.Equal(\n new Hash128(0x{0:X16}UL, 0x{1:X16}UL),\n murmur.ComputeHash(new Span<byte>(new byte[]{{ {2} }})));", | |
// hash.Left, | |
// hash.Right, | |
// string.Join(",", random.Select(b => $"0x{b:X2}"))); | |
//} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment