Skip to content

Instantly share code, notes, and snippets.

@vermorel
Created November 26, 2018 15:55
Show Gist options
  • Save vermorel/01446a53476d983988849726588156c6 to your computer and use it in GitHub Desktop.
Save vermorel/01446a53476d983988849726588156c6 to your computer and use it in GitHub Desktop.
Fast MurmurHash3 in C#
using System;
using System.Collections.Generic;
using Xunit;
namespace MyMurmurHash
{
/// <summary>
/// Fast hash - non-crypto secure. Intended to detect data storage
/// corruption.
/// </summary>
/// <remarks>
/// Based on an original implementation from Adam Horvath's
/// http://blog.teamleadnet.com/2012/08/murmurhash3-ultra-fast-hash-algorithm.html
/// </remarks>
public class MurmurHash3
{
// 128 bit output, 64 bit platform version
private const ulong ReadSize = 16;
private const ulong C1 = 0x87c37b91114253d5L;
private const ulong C2 = 0x4cf5ad432745937fL;
/// <remarks>A random default seed.</remarks>
private const uint DefaultSeed = 0xdbe22f02;
private readonly uint _seed;
public MurmurHash3()
{
_seed = DefaultSeed;
}
public MurmurHash3(uint seed)
{
_seed = seed;
}
public Hash128 ComputeHash(Span<byte> bb)
{
ulong h1 = _seed;
ulong h2 = 0;
ulong length = 0;
int pos = 0;
ulong remaining = (ulong)bb.Length;
// read 128 bits, 16 bytes, 2 longs in eacy cycle
while (remaining >= ReadSize)
{
ulong k1 = bb.GetUInt64(pos);
pos += 8;
ulong k2 = bb.GetUInt64(pos);
pos += 8;
length += ReadSize;
remaining -= ReadSize;
MixBody(k1, k2);
}
// if the input MOD 16 != 0
if (remaining > 0)
{
ProcessBytesRemaining(bb);
}
return CreateHash();
void MixBody(ulong k1, ulong k2)
{
h1 ^= MixKey1(k1);
h1 = h1.RotateLeft(27);
h1 += h2;
h1 = h1 * 5 + 0x52dce729;
h2 ^= MixKey2(k2);
h2 = h2.RotateLeft(31);
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
void ProcessBytesRemaining(Span<byte> rbb)
{
ulong k1 = 0;
ulong k2 = 0;
length += remaining;
// little endian (x86) processing
switch (remaining)
{
case 15:
k2 ^= (ulong)rbb[pos + 14] << 48; // fall through
goto case 14;
case 14:
k2 ^= (ulong)rbb[pos + 13] << 40; // fall through
goto case 13;
case 13:
k2 ^= (ulong)rbb[pos + 12] << 32; // fall through
goto case 12;
case 12:
k2 ^= (ulong)rbb[pos + 11] << 24; // fall through
goto case 11;
case 11:
k2 ^= (ulong)rbb[pos + 10] << 16; // fall through
goto case 10;
case 10:
k2 ^= (ulong)rbb[pos + 9] << 8; // fall through
goto case 9;
case 9:
k2 ^= rbb[pos + 8]; // fall through
goto case 8;
case 8:
k1 ^= rbb.GetUInt64(pos);
break;
case 7:
k1 ^= (ulong)rbb[pos + 6] << 48; // fall through
goto case 6;
case 6:
k1 ^= (ulong)rbb[pos + 5] << 40; // fall through
goto case 5;
case 5:
k1 ^= (ulong)rbb[pos + 4] << 32; // fall through
goto case 4;
case 4:
k1 ^= (ulong)rbb[pos + 3] << 24; // fall through
goto case 3;
case 3:
k1 ^= (ulong)rbb[pos + 2] << 16; // fall through
goto case 2;
case 2:
k1 ^= (ulong)rbb[pos + 1] << 8; // fall through
goto case 1;
case 1:
k1 ^= rbb[pos]; // fall through
break;
default:
throw new InvalidOperationException("Something went wrong with remaining bytes calculation.");
}
h1 ^= MixKey1(k1);
h2 ^= MixKey2(k2);
}
Hash128 CreateHash()
{
h1 ^= length;
h2 ^= length;
h1 += h2;
h2 += h1;
h1 = MixFinal(h1);
h2 = MixFinal(h2);
h1 += h2;
h2 += h1;
return new Hash128(h1, h2);
}
}
private static ulong MixKey1(ulong k1)
{
k1 *= C1;
k1 = k1.RotateLeft(31);
k1 *= C2;
return k1;
}
private static ulong MixKey2(ulong k2)
{
k2 *= C2;
k2 = k2.RotateLeft(33);
k2 *= C1;
return k2;
}
private static ulong MixFinal(ulong k)
{
// avalanche bits
k ^= k >> 33;
k *= 0xff51afd7ed558ccdL;
k ^= k >> 33;
k *= 0xc4ceb9fe1a85ec53L;
k ^= k >> 33;
return k;
}
}
static class IntHelpers
{
// PERF: 'RotateRight' might be the idiomatic flavor that gets the speed-up
// https://github.com/dotnet/coreclr/issues/1619
public static ulong RotateLeft(this ulong original, int bits)
{
return (original << bits) | (original >> (64 - bits));
}
public static ulong RotateRight(this ulong original, int bits)
{
return (original >> bits) | (original << (64 - bits));
}
public static ulong GetUInt64(this Span<byte> bb, int pos)
{
// PERF: use 'MemoryMarshal.Cast' to speed up this method
return (ulong)(
bb[pos++] << 0 | bb[pos++] << 8 | bb[pos++] << 16 | bb[pos++] << 24 |
bb[pos++] << 32 | bb[pos++] << 40 | bb[pos++] << 48 | bb[pos] << 56);
}
// [vermorel] 2018-02-13, no Span overload available yet for BitConverter
//public static ulong GetUInt64(this byte[] bb, int pos)
//{
// return BitConverter.ToUInt64(bb, pos);
//}
// [vermorel] 2018-02-13, we don't want to introduce unsafe code yet
//unsafe public static ulong GetUInt64(this byte[] bb, int pos)
//{
// // we only read aligned longs, so a simple casting is enough
// fixed (byte* pbyte = &bb[pos])
// {
// return *((ulong*)pbyte);
// }
//}
}
public class HashMurmurTests
{
[Fact(Skip="Hash128 override not implemented.")]
public void TestMurmurCollisions()
{
var murmur = new MurmurHash3();
const int numHashes = 1_000_000;
// Value is seed that generated the byte buffer for that hash
var randoms = new Dictionary<Hash128, int>();
for (var seed = 0; seed < numHashes; seed++)
{
var random = FromSeed(seed);
var hash = murmur.ComputeHash(new Span<byte>(random));
if (randoms.TryGetValue(hash, out var previous))
{
Assert.True(Equal(FromSeed(previous), random));
}
else
{
randoms.Add(hash, seed);
}
}
bool Equal(byte[] a, byte[] b)
{
if (a.Length != b.Length) return false;
for (var i = 0; i < a.Length; ++i)
if (a[i] != b[i])
return false;
return true;
}
byte[] FromSeed(int seed)
{
var rnd = new Random(seed);
var length = rnd.Next(0, 1024);
var random = new byte[length];
rnd.NextBytes(random);
return random;
}
}
[Fact]
public void ExplicitHashes()
{
var murmur = new MurmurHash3();
Assert.Equal(
new Hash128(0xE73861FF89CFCB94UL, 0x3D11EE9A4D2D8FDBUL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x0C, 0x46, 0x6F, 0x5D, 0x75, 0xE4, 0xD8, 0xAD, 0x67, 0x65,
0xD5, 0xF5, 0x19, 0xDB, 0xC8, 0x2B, 0x7C, 0x34, 0x3B, 0x37,
0xF8, 0x85, 0x00, 0xEC, 0x5E, 0x64, 0x00, 0x53, 0x93, 0xB3,
0x0D, 0x4A, 0x40, 0x29, 0x75, 0xCF, 0x74, 0x6C, 0x0F, 0xF4,
0xBE, 0xC2, 0x4D, 0x1E, 0x04, 0xEB
})));
Assert.Equal(
new Hash128(0xEBE484D89481C7E5UL, 0xF3C4EC3250791AD4UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0xD0, 0x86, 0x82, 0x40, 0x97, 0xE4, 0xA3, 0x95, 0xCF, 0xFF,
0x46, 0x69, 0x9C, 0x73, 0xC4
})));
Assert.Equal(
new Hash128(0xDB5727E7A452A0C3UL, 0x3218FE120EF26A02UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x93, 0xC6, 0x95, 0x24, 0xB9, 0xE3, 0x6F, 0x7C, 0x38, 0x98,
0xB7, 0xDD, 0x20, 0x0B, 0xC1, 0x16, 0x1E, 0xEC, 0x2E, 0xF0,
0xBD, 0x16, 0x46, 0xF2, 0xA9, 0xE6, 0x93, 0x6C, 0x62, 0x69,
0x33, 0xEF, 0x85, 0x5A, 0xB9, 0x0A, 0x53, 0xB0, 0xF5, 0xA8,
0x47, 0x81, 0x2F, 0xFF, 0x72, 0xBC, 0x65, 0x2B, 0xAE
})));
Assert.Equal(
new Hash128(0x509D537B30B13CC6UL, 0x712F56AB51DC5499UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x56, 0x05, 0xA9, 0x07, 0xDB, 0xE3, 0x3A, 0x63, 0xA0, 0x32,
0x27, 0x50, 0xA4, 0xA3, 0xBD, 0x8C, 0x6E, 0xC8
})));
Assert.Equal(
new Hash128(0x637EF97E412E4149UL, 0xEEA7C81F0FC34D1FUL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x19, 0x45, 0xBC, 0xEB, 0xFD, 0xE2, 0x06, 0x4A, 0x08, 0xCC,
0x98, 0xC4, 0x27, 0x3A, 0xBA, 0x01, 0xBF, 0xA4, 0x20, 0xA8,
0x81, 0xA6, 0x8D, 0xF9, 0xF5, 0x68, 0x27, 0x84, 0x32, 0x20,
0x5A, 0x93, 0xC9, 0x8B, 0xFD, 0x46, 0x33, 0xF5, 0xDA, 0x5B,
0xCF, 0x40, 0x11, 0xE1, 0xE0, 0x8C, 0x7F, 0xA1, 0xC4, 0xA6,
0x66, 0x59
})));
Assert.Equal(
new Hash128(0x7391E9B717793FDBUL, 0x7931A53A82C2BDDDUL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0xDD, 0x85, 0xCF, 0xCE, 0x1E, 0xE2, 0xD1, 0x31, 0x71, 0x65,
0x09, 0x38, 0xAB, 0xD2, 0xB7, 0x76, 0x10, 0x80, 0x1A, 0x84,
0xE4
})));
Assert.Equal(
new Hash128(0xBDFE03BD2CADC0B3UL, 0xDEBCF6CB911681B0UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0xA0, 0xC4, 0xE2, 0xB1, 0x40, 0xE1, 0x9D, 0x18, 0xD9, 0xFF,
0x7A, 0xAB, 0x2E, 0x6A, 0xB3, 0xEC, 0x60, 0x5C, 0x13, 0x60,
0x46, 0x37, 0xD4, 0xFF, 0x40, 0xEA, 0xBA, 0x9D, 0x01, 0xD7,
0x81, 0x38, 0x0D, 0xBB, 0x42, 0x82, 0x12, 0x39, 0xBF, 0x0F,
0x58, 0xFF, 0xF3, 0xC2, 0x4E, 0x5D, 0x99, 0x17, 0xDB, 0xFB,
0x45, 0x62, 0xEE, 0x55, 0xD9
})));
Assert.Equal(
new Hash128(0xF3E1643FCE9EB048UL, 0xB66D2B9F3C14788AUL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x63, 0x04, 0xF6, 0x95, 0x62, 0xE1, 0x68, 0xFF, 0x41, 0x99,
0xEA, 0x1F, 0xB2, 0x01, 0xB0, 0x61, 0xB1, 0x38, 0x0D, 0x3C,
0xA8, 0xFF, 0x77, 0x82
})));
Assert.Equal(
new Hash128(0x9D9B12AEF8475FD5UL, 0xF2B6EE0739DBAE27UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0x27, 0x44, 0x09, 0x78, 0x84, 0xE0, 0x34, 0xE6, 0xAA, 0x33,
0x5B, 0x93, 0x35, 0x99, 0xAD, 0xD7, 0x01, 0x14, 0x06, 0x18,
0x0A, 0xC7, 0x1A, 0x05, 0x8B, 0x6B, 0x4D, 0xB5, 0xD1, 0x8E,
0xA7, 0xDD, 0x51, 0xEC, 0x86, 0xBD, 0xF1, 0x7D, 0xA4, 0xC2,
0xE0, 0xBE, 0xD6, 0xA3, 0xBB, 0x2E, 0xB3, 0x8D, 0xF1, 0x4F,
0x24, 0x6C, 0x57, 0xA3, 0x6C, 0x5F, 0x29
})));
Assert.Equal(
new Hash128(0xDAFC6D939D01D280UL, 0x64DA2389BE032CF0UL),
murmur.ComputeHash(new Span<byte>(new byte[]
{
0xEA, 0x84, 0x1C, 0x5C, 0xA6, 0xDF, 0xFF, 0xCE, 0x12, 0xCC,
0xCC, 0x06, 0xB9, 0x31, 0xA9, 0x4C, 0x52, 0xF0, 0xFF, 0xF5,
0x6D, 0x8F, 0xBE, 0x88, 0xB1, 0xAC, 0x97
})));
// The code above was generated with:
//byte[] FromSeed(int seed)
//{
// var rnd = new Random(seed);
// var length = rnd.Next(0, 64);
// var random = new byte[length];
// rnd.NextBytes(random);
// return random;
//}
//var murmur = new MurmurHash3();
//for (var i = 0; i < 10; ++i)
//{
// var random = FromSeed(i);
// var hash = murmur.ComputeHash(new Span<byte>(random));
// Console.WriteLine("Assert.Equal(\n new Hash128(0x{0:X16}UL, 0x{1:X16}UL),\n murmur.ComputeHash(new Span<byte>(new byte[]{{ {2} }})));",
// hash.Left,
// hash.Right,
// string.Join(",", random.Select(b => $"0x{b:X2}")));
//}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment