Created
July 6, 2018 12:18
-
-
Save iscgar/630cf5f5d8dbb8b8bd1b05b4f8796d21 to your computer and use it in GitHub Desktop.
A C++11 constexpr implementation of 32-bit MurmurHash3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
#include <stdint.h> | |
#include <limits> | |
#include <type_traits> | |
namespace detail | |
{ | |
template<size_t R, class T> | |
static inline constexpr T rotl(const T v) | |
{ | |
static_assert(std::is_unsigned<T>::value, "Can only rotate unsigned integral types"); | |
return (v << (R % std::numeric_limits<T>::digits)) | | |
(v >> (std::numeric_limits<T>::digits - (R % std::numeric_limits<T>::digits))); | |
} | |
template<class T> | |
struct LittleEndianIndexer | |
{ | |
static inline constexpr size_t index_of(size_t v) { return v; } | |
}; | |
template<class T> | |
struct BigEndianIndexer | |
{ | |
static inline constexpr size_t index_of(size_t v) { return (sizeof(T) - v - 1); } | |
}; | |
template<class T, template<typename> class Indexer = LittleEndianIndexer> | |
struct IntegralByteView | |
{ | |
static_assert(std::is_integral<T>::value, "IntegralByteView supports only integral types"); | |
inline constexpr IntegralByteView(const T *const p, size_t n) : _p(p), _n(p ? n * sizeof(T) : 0) {} | |
inline constexpr size_t size() const { return _n; } | |
inline constexpr uint8_t byte_at(size_t n) const | |
{ | |
return n < _n ? | |
uint8_t(_p[n / sizeof(T)] >> (Indexer<T>::index_of(n % sizeof(T)) * std::numeric_limits<uint8_t>::digits)) : | |
throw int(); | |
} | |
private: | |
const T *const _p; | |
size_t _n; | |
}; | |
template<class T> struct MurmurHash3; | |
template<> | |
struct MurmurHash3<uint32_t> | |
{ | |
using value_type = uint32_t; | |
template<class T, template<typename> class Indexer> | |
inline constexpr MurmurHash3(const IntegralByteView<T, Indexer> &s, value_type seed) : | |
_value(fmix3(fmix2(fmix1(tail(body(seed, s, 0), s))))) | |
{ | |
} | |
inline constexpr operator value_type() const { return _value; } | |
private: | |
static constexpr const uint32_t C1 = 0xcc9e2d51; | |
static constexpr const uint32_t C2 = 0x1b873593; | |
static constexpr const size_t BODY_CHUNK_SIZE = 256; | |
template<class T, template<typename> class Indexer> | |
static inline constexpr value_type get_block(const IntegralByteView<T, Indexer> &s, size_t offset) | |
{ | |
return value_type(s.byte_at(offset)) | (value_type(s.byte_at(offset + 1)) << 8) | | |
(value_type(s.byte_at(offset + 2)) << 16) | (value_type(s.byte_at(offset + 3)) << 24); | |
} | |
template<class T, template<typename> class Indexer> | |
static inline constexpr value_type get_tail(const IntegralByteView<T, Indexer> &s, size_t offset) | |
{ | |
return offset >= s.size() ? 0 : | |
value_type(s.byte_at(offset)) | | |
(s.size() - offset > 1 ? (value_type(s.byte_at(offset + 1)) << 8) : 0) | | |
(s.size() - offset > 2 ? (value_type(s.byte_at(offset + 2)) << 16) : 0); | |
} | |
static inline constexpr value_type process_block(value_type k1, value_type h) | |
{ | |
return 0xe6546b64 + (5 * rotl<13>(h ^ (rotl<15>(k1 * C1) * C2))); | |
} | |
static inline constexpr value_type fmix1(value_type h) | |
{ | |
return (h ^ h >> 16) * 0x85ebca6b; | |
} | |
static inline constexpr value_type fmix2(value_type h) | |
{ | |
return (h ^ (h >> 13)) * 0xc2b2ae35; | |
} | |
static inline constexpr value_type fmix3(value_type h) | |
{ | |
return h ^ (h >> 16); | |
} | |
template<class T, template<typename> class Indexer> | |
static inline constexpr value_type body_chunk(value_type h, const IntegralByteView<T, Indexer> &s, size_t offset, size_t left) | |
{ | |
return left < 4 ? h : body_chunk(process_block(get_block(s, offset), h), s, offset + 4, left - 4); | |
} | |
template<class T, template<typename> class Indexer> | |
static inline constexpr value_type body(value_type h, const IntegralByteView<T, Indexer> &s, size_t offset) | |
{ | |
return (offset > s.size() || s.size() - offset < 4) ? h : | |
body(body_chunk(h, s, offset, s.size() - offset >= BODY_CHUNK_SIZE ? BODY_CHUNK_SIZE : s.size() - offset), | |
s, offset + BODY_CHUNK_SIZE); | |
} | |
template<class T, template<typename> class Indexer> | |
static inline constexpr value_type tail(value_type h, const IntegralByteView<T, Indexer> &s) | |
{ | |
return s.size() ^ h ^ (rotl<15>(get_tail(s, s.size() - (s.size() & 3)) * C1) * C2); | |
} | |
value_type _value; | |
}; | |
} | |
template<class T, class U> | |
inline constexpr T murmur_hash3(const U *s, size_t n, const T seed = 0) | |
{ | |
return detail::MurmurHash3<T>(detail::IntegralByteView<U>(s, n), seed); | |
} | |
namespace detail | |
{ | |
inline constexpr const char* _strnul_chunk(const char *s, size_t l) { return (!l || !s || !*s) ? s : _strnul_chunk(s + 1, l - 1); } | |
inline constexpr const char* strnul(const char *s) { return (!s || !*s) ? s : strnul(_strnul_chunk(s + 1, 250)); } | |
inline constexpr size_t strlen(const char *s) { return strnul(s) - s; } | |
} | |
template<class T> | |
inline constexpr T murmur_hash3(const char *s, const T seed = 0) | |
{ | |
return detail::MurmurHash3<T>(detail::IntegralByteView<char>(s, detail::strlen(s)), seed); | |
} | |
#include <stdio.h> | |
int main() | |
{ | |
static const struct | |
{ | |
uint8_t data[5]; | |
size_t length; | |
uint32_t seed; | |
uint32_t expected; | |
} test_vectors_u8[] = | |
{ | |
{ { }, 0, 0, 0 }, | |
{ { }, 0, 1, 0x514E28B7 }, | |
{ { }, 0, 0xffffffff, 0x81F16F39 }, | |
{ { 0xFF, 0xFF, 0xFF, 0xFF }, 4, 0, 0x76293B50 }, | |
{ { 0x21, 0x43, 0x65, 0x87 }, 4, 0, 0xF55B516B }, | |
{ { 0x21, 0x43, 0x65, 0x87 }, 4, 0x5082EDEE, 0x2362F9DE }, | |
{ { 0x21, 0x43, 0x65, }, 3, 0, 0x7E4A8634 }, | |
{ { 0x21, 0x43 }, 2, 0, 0xA0F7B07A }, | |
{ { 0x21 }, 1, 0, 0x72661CF4 }, | |
{ { 00, 00, 00, 00 }, 4, 0, 0x2362F9DE }, | |
{ { 00, 00, 00, }, 3, 0, 0x85F0B427 }, | |
{ { 00, 00 }, 2, 0, 0x30F4C306 }, | |
{ { 00 }, 1, 0, 0x514E28B7 }, | |
}; | |
for (auto &test : test_vectors_u8) | |
{ | |
uint32_t const hash = murmur_hash3<uint32_t>(test.data, test.length, test.seed); | |
printf("%d, %08x: %08x == %08x (%s)\n", test.length, test.seed, hash, test.expected, hash == test.expected ? "ok" : "fail"); | |
} | |
static const struct | |
{ | |
const char *s; | |
uint32_t expected; | |
} test_vectors_char[] = | |
{ | |
{"uhadrighea", 0x670f7f27}, | |
{"iuhaeruigh", 0x215f6cee}, | |
{"qeirugh", 0x3268ca3}, | |
{"iuqwehgiu", 0x243bd696}, | |
{"hwdfiug", 0xf3f4e343}, | |
{"h845gwhr89gh3j6w", 0xbdd359da}, | |
{"8978erthg", 0xf28f95a2}, | |
{"aes5y$%JU^WRS^&*EJ%YGHIA", 0x5fa8197c}, | |
{"jegAEJ%YJ", 0x31459048}, | |
{"WR(^HJ", 0x9b530e01}, | |
{"SFJN(", 0xc8381769}, | |
{"ERJ^HJKDR^YJHSDI*F", 0xff1580c3}, | |
{"JB*SEJTHI", 0x7e05a7f3}, | |
{"$^JK", 0xad57f96e}, | |
{"top7djSRTIJAS", 0xe699d7d3}, | |
{"E%TJCA#(ARHKFYO<<>PJKU", 0xc046b1d3}, | |
{"OZ,/TDKJSR56./,YJRTKJOPSJ$%HI", 0xee054684}, | |
{"SCJ%IGJAEFGJ", 0x11f310d8}, | |
{"ETH87,9\\[p[", 0xdcf11cd6}, | |
{"ISRTJHIS", 0x1fffa759}, | |
{"ROTNJH", 0x7826ea9b}, | |
{"ISEN%GIQ^", 0xf364917c}, | |
{"IURT^UIOEMR^Y", 0x64aafe4e}, | |
{"N3564w57E%&IDTK7mrynjwR^KJ", 0x4905f46f} | |
}; | |
for (auto &test : test_vectors_char) | |
{ | |
uint32_t const hash = murmur_hash3<uint32_t>(test.s); | |
printf("%-30s: %08x == %08x (%s)\n", test.s, hash, test.expected, hash == test.expected ? "ok" : "fail"); | |
} | |
static_assert(murmur_hash3<uint32_t>("Hello, world!") == 0xc0363e43, "fail"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment