Last active
January 12, 2026 04:48
-
-
Save jweinst1/4fc762249cc1243effb41d816bd51637 to your computer and use it in GitHub Desktop.
hashmap for murmur3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <vector> | |
| #include <unordered_map> | |
| #include <random> | |
| #include <chrono> | |
| #include <cstdint> | |
| // ======================================================= | |
| // MurmurHash3 x64 → 64-bit | |
| // ======================================================= | |
| static inline uint32_t rotl32(uint32_t x, int8_t r) { | |
| return (x << r) | (x >> (32 - r)); | |
| } | |
| static inline uint32_t fmix32(uint32_t h) { | |
| h ^= h >> 16; | |
| h *= 0x85ebca6b; | |
| h ^= h >> 13; | |
| h *= 0xc2b2ae35; | |
| h ^= h >> 16; | |
| return h; | |
| } | |
| uint32_t murmur3_32(const void* key, size_t len, uint32_t seed = 0) { | |
| const uint8_t* data = static_cast<const uint8_t*>(key); | |
| const size_t nblocks = len / 4; | |
| uint32_t h1 = seed; | |
| const uint32_t c1 = 0xcc9e2d51; | |
| const uint32_t c2 = 0x1b873593; | |
| // body | |
| const uint32_t* blocks = reinterpret_cast<const uint32_t*>(data); | |
| for (size_t i = 0; i < nblocks; i++) { | |
| uint32_t k1 = blocks[i]; | |
| k1 *= c1; | |
| k1 = rotl32(k1, 15); | |
| k1 *= c2; | |
| h1 ^= k1; | |
| h1 = rotl32(h1, 13); | |
| h1 = h1 * 5 + 0xe6546b64; | |
| } | |
| // tail | |
| const uint8_t* tail = data + nblocks * 4; | |
| uint32_t k1 = 0; | |
| switch (len & 3) { | |
| case 3: k1 ^= uint32_t(tail[2]) << 16; | |
| case 2: k1 ^= uint32_t(tail[1]) << 8; | |
| case 1: k1 ^= uint32_t(tail[0]); | |
| k1 *= c1; | |
| k1 = rotl32(k1, 15); | |
| k1 *= c2; | |
| h1 ^= k1; | |
| } | |
| // finalization | |
| h1 ^= static_cast<uint32_t>(len); | |
| h1 = fmix32(h1); | |
| return h1; | |
| } | |
| // ======================================================= | |
| // Linear-probing hash-only map | |
| // ======================================================= | |
| template<typename V> | |
| class HashOnlyLinearMap { | |
| struct KVPair { | |
| uint32_t _hash = 0; | |
| V _value; | |
| inline bool add(uint32_t hash, const V& val) { | |
| if (_hash == 0) { | |
| _hash = hash; | |
| _value = val; | |
| return true; | |
| } else if (_hash == hash) { | |
| _value = val; | |
| return true; | |
| } | |
| return false; | |
| } | |
| inline bool find(uint32_t hash, const V** val) const { | |
| if (_hash == 0) { | |
| return false; | |
| } else if (_hash == hash) { | |
| *val = &_value; | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| struct Slot { | |
| KVPair pairs[8]; | |
| inline bool add(uint32_t hash, const V& val) { | |
| return pairs[0].add(hash, val) || | |
| pairs[1].add(hash, val) || | |
| pairs[2].add(hash, val) || | |
| pairs[3].add(hash, val) || | |
| pairs[4].add(hash, val) || | |
| pairs[5].add(hash, val) || | |
| pairs[6].add(hash, val) || | |
| pairs[7].add(hash, val); | |
| } | |
| inline bool find(uint32_t hash, const V** val) const { | |
| return pairs[0].find(hash, val) || | |
| pairs[1].find(hash, val) || | |
| pairs[2].find(hash, val) || | |
| pairs[3].find(hash, val) || | |
| pairs[4].find(hash, val) || | |
| pairs[5].find(hash, val) || | |
| pairs[6].find(hash, val) || | |
| pairs[7].find(hash, val); | |
| } | |
| }; | |
| std::vector<Slot> table; | |
| size_t mask; | |
| public: | |
| explicit HashOnlyLinearMap(size_t power_of_two_capacity) { | |
| size_t cap = 1ULL << power_of_two_capacity; | |
| table.resize(cap); | |
| mask = cap - 1; | |
| } | |
| void insert(uint32_t hash, const V& value) { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| Slot& s = table[idx]; | |
| if (s.add(hash, value)) { | |
| return; | |
| } | |
| idx = (idx + 1) & mask; // linear probe | |
| } | |
| } | |
| const V* find(uint32_t hash) const { | |
| size_t idx = hash & mask; | |
| const V* myPtr = nullptr; | |
| while (true) { | |
| const Slot& s = table[idx]; | |
| if (s.find(hash, &myPtr)) { | |
| return myPtr; | |
| } | |
| idx = (idx + 1) & mask; | |
| } | |
| } | |
| }; | |
| // ======================================================= | |
| // Benchmark | |
| // ======================================================= | |
| int main() { | |
| constexpr size_t N = 10'000'000; | |
| std::vector<uint32_t> keys(N); | |
| std::mt19937_64 rng(123); | |
| for (auto& k : keys) k = rng(); | |
| // Custom map | |
| HashOnlyLinearMap<uint32_t> custom_map(26); // ~2M slots | |
| /* | |
| * Custom linear-probe map: 69689us | |
| std::unordered_map: 298924us | |
| no hash, per insert / lookup half | |
| Custom linear-probe map: 18881us | |
| std::unordered_map: 278250us | |
| jweinstein@JOSWEINS-M-J60X cwork % ./hashbench | |
| Custom linear-probe map: 91792us | |
| std::unordered_map: 3308128us | |
| 32bit | |
| jweinstein@JOSWEINS-M-J60X cwork % ./hashbench | |
| Custom linear-probe map: 75551us | |
| std::unordered_map: 3468358us | |
| */ | |
| for (auto k : keys) { | |
| uint32_t h = murmur3_32(&k, sizeof(k)); | |
| custom_map.insert(k, k); | |
| } | |
| auto t1 = std::chrono::high_resolution_clock::now(); | |
| uint32_t sum1 = 0; | |
| for (const auto& k : keys) { | |
| uint32_t h = murmur3_32(&k, sizeof(k)); | |
| if (auto* v = custom_map.find(k)) { | |
| sum1 += *v; | |
| } | |
| } | |
| auto t2 = std::chrono::high_resolution_clock::now(); | |
| // std::unordered_map | |
| std::unordered_map<uint32_t, std::vector<uint32_t>> std_map; | |
| auto t3 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| std_map[k].push_back(k); | |
| } | |
| uint32_t sum2 = 0; | |
| for (auto k : keys) { | |
| for (auto x : std_map[k]) sum2 += x; | |
| } | |
| auto t4 = std::chrono::high_resolution_clock::now(); | |
| std::cout << "Custom linear-probe map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us\n"; | |
| std::cout << "std::unordered_map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count() << "us\n"; | |
| std::cout << "Ignore sums: " << sum1 << " " << sum2 << "\n"; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <vector> | |
| #include <unordered_map> | |
| #include <random> | |
| #include <chrono> | |
| #include <cstdint> | |
| // ======================================================= | |
| // MurmurHash3 x64 → 64-bit | |
| // ======================================================= | |
| static inline uint64_t rotl64(uint64_t x, int8_t r) { | |
| return (x << r) | (x >> (64 - r)); | |
| } | |
| static inline uint64_t fmix64(uint64_t k) { | |
| k ^= k >> 33; | |
| k *= 0xff51afd7ed558ccdULL; | |
| k ^= k >> 33; | |
| k *= 0xc4ceb9fe1a85ec53ULL; | |
| k ^= k >> 33; | |
| return k; | |
| } | |
| uint64_t murmur3_64(const void* key, size_t len, uint64_t seed = 0) { | |
| const uint8_t* data = static_cast<const uint8_t*>(key); | |
| size_t nblocks = len / 16; | |
| uint64_t h1 = seed; | |
| uint64_t h2 = seed; | |
| const uint64_t c1 = 0x87c37b91114253d5ULL; | |
| const uint64_t c2 = 0x4cf5ad432745937fULL; | |
| const uint64_t* blocks = (const uint64_t*)data; | |
| for (size_t i = 0; i < nblocks; i++) { | |
| uint64_t k1 = blocks[i * 2]; | |
| uint64_t k2 = blocks[i * 2 + 1]; | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| h1 = rotl64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| h2 = rotl64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
| } | |
| const uint8_t* tail = data + nblocks * 16; | |
| uint64_t k1 = 0, k2 = 0; | |
| switch (len & 15) { | |
| case 15: k2 ^= uint64_t(tail[14]) << 48; | |
| case 14: k2 ^= uint64_t(tail[13]) << 40; | |
| case 13: k2 ^= uint64_t(tail[12]) << 32; | |
| case 12: k2 ^= uint64_t(tail[11]) << 24; | |
| case 11: k2 ^= uint64_t(tail[10]) << 16; | |
| case 10: k2 ^= uint64_t(tail[9]) << 8; | |
| case 9: k2 ^= uint64_t(tail[8]); | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| case 8: k1 ^= uint64_t(tail[7]) << 56; | |
| case 7: k1 ^= uint64_t(tail[6]) << 48; | |
| case 6: k1 ^= uint64_t(tail[5]) << 40; | |
| case 5: k1 ^= uint64_t(tail[4]) << 32; | |
| case 4: k1 ^= uint64_t(tail[3]) << 24; | |
| case 3: k1 ^= uint64_t(tail[2]) << 16; | |
| case 2: k1 ^= uint64_t(tail[1]) << 8; | |
| case 1: k1 ^= uint64_t(tail[0]); | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| } | |
| h1 ^= len; | |
| h2 ^= len; | |
| h1 += h2; | |
| h2 += h1; | |
| h1 = fmix64(h1); | |
| h2 = fmix64(h2); | |
| h1 += h2; | |
| return h1; | |
| } | |
| // ======================================================= | |
| // Linear-probing hash-only map | |
| // ======================================================= | |
| template<typename V> | |
| class HashOnlyLinearMap { | |
| struct KVPair { | |
| uint64_t _hash = 0; | |
| V _value; | |
| inline bool add(uint64_t hash, const V& val) { | |
| if (_hash == 0) { | |
| _hash = hash; | |
| _value = val; | |
| return true; | |
| } else if (_hash == hash) { | |
| _value = val; | |
| return true; | |
| } | |
| return false; | |
| } | |
| inline bool find(uint64_t hash, const V** val) const { | |
| if (_hash == 0) { | |
| return false; | |
| } else if (_hash == hash) { | |
| *val = &_value; | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| struct Slot { | |
| KVPair pairs[8]; | |
| inline bool add(uint64_t hash, const V& val) { | |
| return pairs[0].add(hash, val) || | |
| pairs[1].add(hash, val) || | |
| pairs[2].add(hash, val) || | |
| pairs[3].add(hash, val) || | |
| pairs[4].add(hash, val) || | |
| pairs[5].add(hash, val) || | |
| pairs[6].add(hash, val) || | |
| pairs[7].add(hash, val); | |
| } | |
| inline bool find(uint64_t hash, const V** val) const { | |
| return pairs[0].find(hash, val) || | |
| pairs[1].find(hash, val) || | |
| pairs[2].find(hash, val) || | |
| pairs[3].find(hash, val) || | |
| pairs[4].find(hash, val) || | |
| pairs[5].find(hash, val) || | |
| pairs[6].find(hash, val) || | |
| pairs[7].find(hash, val); | |
| } | |
| }; | |
| std::vector<Slot> table; | |
| size_t mask; | |
| public: | |
| explicit HashOnlyLinearMap(size_t power_of_two_capacity) { | |
| size_t cap = 1ULL << power_of_two_capacity; | |
| table.resize(cap); | |
| mask = cap - 1; | |
| } | |
| void insert(uint64_t hash, const V& value) { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| Slot& s = table[idx]; | |
| if (s.add(hash, value)) { | |
| return; | |
| } | |
| idx = (idx + 1) & mask; // linear probe | |
| } | |
| } | |
| const V* find(uint64_t hash) const { | |
| size_t idx = hash & mask; | |
| const V* myPtr = nullptr; | |
| while (true) { | |
| const Slot& s = table[idx]; | |
| if (s.find(hash, &myPtr)) { | |
| return myPtr; | |
| } | |
| idx = (idx + 1) & mask; | |
| } | |
| } | |
| }; | |
| // ======================================================= | |
| // Benchmark | |
| // ======================================================= | |
| int main() { | |
| constexpr size_t N = 2'000'000; | |
| std::vector<uint64_t> keys(N); | |
| std::mt19937_64 rng(123); | |
| for (auto& k : keys) k = rng(); | |
| // Custom map | |
| HashOnlyLinearMap<uint64_t> custom_map(24); // ~2M slots | |
| /* | |
| * Custom linear-probe map: 69689us | |
| std::unordered_map: 298924us | |
| no hash, per insert / lookup half | |
| Custom linear-probe map: 18881us | |
| std::unordered_map: 278250us | |
| */ | |
| for (auto k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| custom_map.insert(k, k); | |
| } | |
| auto t1 = std::chrono::high_resolution_clock::now(); | |
| uint64_t sum1 = 0; | |
| for (const auto& k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| if (auto* v = custom_map.find(k)) { | |
| sum1 += *v; | |
| } | |
| } | |
| auto t2 = std::chrono::high_resolution_clock::now(); | |
| // std::unordered_map | |
| std::unordered_map<uint64_t, std::vector<uint64_t>> std_map; | |
| std_map.reserve(N); | |
| auto t3 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| std_map[k].push_back(k); | |
| } | |
| uint64_t sum2 = 0; | |
| for (auto k : keys) { | |
| for (auto x : std_map[k]) sum2 += x; | |
| } | |
| auto t4 = std::chrono::high_resolution_clock::now(); | |
| std::cout << "Custom linear-probe map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us\n"; | |
| std::cout << "std::unordered_map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count() << "us\n"; | |
| std::cout << "Ignore sums: " << sum1 << " " << sum2 << "\n"; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <vector> | |
| #include <unordered_map> | |
| #include <random> | |
| #include <chrono> | |
| #include <cstdint> | |
| // ======================================================= | |
| // MurmurHash3 x64 → 64-bit | |
| // ======================================================= | |
| static inline uint64_t rotl64(uint64_t x, int8_t r) { | |
| return (x << r) | (x >> (64 - r)); | |
| } | |
| static inline uint64_t fmix64(uint64_t k) { | |
| k ^= k >> 33; | |
| k *= 0xff51afd7ed558ccdULL; | |
| k ^= k >> 33; | |
| k *= 0xc4ceb9fe1a85ec53ULL; | |
| k ^= k >> 33; | |
| return k; | |
| } | |
| uint64_t murmur3_64(const void* key, size_t len, uint64_t seed = 0) { | |
| const uint8_t* data = static_cast<const uint8_t*>(key); | |
| size_t nblocks = len / 16; | |
| uint64_t h1 = seed; | |
| uint64_t h2 = seed; | |
| const uint64_t c1 = 0x87c37b91114253d5ULL; | |
| const uint64_t c2 = 0x4cf5ad432745937fULL; | |
| const uint64_t* blocks = (const uint64_t*)data; | |
| for (size_t i = 0; i < nblocks; i++) { | |
| uint64_t k1 = blocks[i * 2]; | |
| uint64_t k2 = blocks[i * 2 + 1]; | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| h1 = rotl64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| h2 = rotl64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
| } | |
| const uint8_t* tail = data + nblocks * 16; | |
| uint64_t k1 = 0, k2 = 0; | |
| switch (len & 15) { | |
| case 15: k2 ^= uint64_t(tail[14]) << 48; | |
| case 14: k2 ^= uint64_t(tail[13]) << 40; | |
| case 13: k2 ^= uint64_t(tail[12]) << 32; | |
| case 12: k2 ^= uint64_t(tail[11]) << 24; | |
| case 11: k2 ^= uint64_t(tail[10]) << 16; | |
| case 10: k2 ^= uint64_t(tail[9]) << 8; | |
| case 9: k2 ^= uint64_t(tail[8]); | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| case 8: k1 ^= uint64_t(tail[7]) << 56; | |
| case 7: k1 ^= uint64_t(tail[6]) << 48; | |
| case 6: k1 ^= uint64_t(tail[5]) << 40; | |
| case 5: k1 ^= uint64_t(tail[4]) << 32; | |
| case 4: k1 ^= uint64_t(tail[3]) << 24; | |
| case 3: k1 ^= uint64_t(tail[2]) << 16; | |
| case 2: k1 ^= uint64_t(tail[1]) << 8; | |
| case 1: k1 ^= uint64_t(tail[0]); | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| } | |
| h1 ^= len; | |
| h2 ^= len; | |
| h1 += h2; | |
| h2 += h1; | |
| h1 = fmix64(h1); | |
| h2 = fmix64(h2); | |
| h1 += h2; | |
| return h1; | |
| } | |
| // ======================================================= | |
| // Linear-probing hash-only map | |
| // ======================================================= | |
| template<typename V> | |
| class HashOnlyLinearMap { | |
| struct Slot { | |
| uint64_t hash = 0; | |
| bool occupied = false; | |
| V value; | |
| }; | |
| std::vector<Slot> table; | |
| size_t mask; | |
| public: | |
| explicit HashOnlyLinearMap(size_t power_of_two_capacity) { | |
| size_t cap = 1ULL << power_of_two_capacity; | |
| table.resize(cap); | |
| mask = cap - 1; | |
| } | |
| void insert(uint64_t hash, const V& value) { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| Slot& s = table[idx]; | |
| if (!s.occupied) { | |
| s.occupied = true; | |
| s.hash = hash; | |
| s.value = value; | |
| return; | |
| } | |
| if (s.hash == hash) { | |
| s.value = value; | |
| return; | |
| } | |
| idx = (idx + 1) & mask; // linear probe | |
| } | |
| } | |
| const V* find(uint64_t hash) const { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| const Slot& s = table[idx]; | |
| if (!s.occupied) { | |
| return nullptr; | |
| } | |
| if (s.hash == hash) { | |
| return &s.value; | |
| } | |
| idx = (idx + 1) & mask; | |
| } | |
| } | |
| }; | |
| // ======================================================= | |
| // Benchmark | |
| // ======================================================= | |
| int main() { | |
| constexpr size_t N = 2'000'000; | |
| std::vector<uint64_t> keys(N); | |
| std::mt19937_64 rng(123); | |
| for (auto& k : keys) k = rng(); | |
| // Custom map | |
| HashOnlyLinearMap<uint64_t> custom_map(24); // ~2M slots | |
| /* | |
| * Custom linear-probe map: 69689us | |
| std::unordered_map: 298924us | |
| */ | |
| auto t1 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| uint64_t h = murmur3_64(&k, sizeof(k)); | |
| custom_map.insert(h, k); | |
| } | |
| uint64_t sum1 = 0; | |
| for (const auto& k : keys) { | |
| uint64_t h = murmur3_64(&k, sizeof(k)); | |
| if (auto* v = custom_map.find(h)) { | |
| sum1 += *v; | |
| } | |
| } | |
| auto t2 = std::chrono::high_resolution_clock::now(); | |
| // std::unordered_map | |
| std::unordered_map<uint64_t, std::vector<uint64_t>> std_map; | |
| std_map.reserve(N); | |
| auto t3 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| std_map[k].push_back(k); | |
| } | |
| uint64_t sum2 = 0; | |
| for (auto k : keys) { | |
| for (auto x : std_map[k]) sum2 += x; | |
| } | |
| auto t4 = std::chrono::high_resolution_clock::now(); | |
| std::cout << "Custom linear-probe map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us\n"; | |
| std::cout << "std::unordered_map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count() << "us\n"; | |
| std::cout << "Ignore sums: " << sum1 << " " << sum2 << "\n"; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <vector> | |
| #include <unordered_map> | |
| #include <random> | |
| #include <chrono> | |
| #include <cstdint> | |
| // ======================================================= | |
| // MurmurHash3 x64 → 64-bit | |
| // ======================================================= | |
| static inline uint64_t rotl64(uint64_t x, int8_t r) { | |
| return (x << r) | (x >> (64 - r)); | |
| } | |
| static inline uint64_t fmix64(uint64_t k) { | |
| k ^= k >> 33; | |
| k *= 0xff51afd7ed558ccdULL; | |
| k ^= k >> 33; | |
| k *= 0xc4ceb9fe1a85ec53ULL; | |
| k ^= k >> 33; | |
| return k; | |
| } | |
| uint64_t murmur3_64(const void* key, size_t len, uint64_t seed = 0) { | |
| const uint8_t* data = static_cast<const uint8_t*>(key); | |
| size_t nblocks = len / 16; | |
| uint64_t h1 = seed; | |
| uint64_t h2 = seed; | |
| const uint64_t c1 = 0x87c37b91114253d5ULL; | |
| const uint64_t c2 = 0x4cf5ad432745937fULL; | |
| const uint64_t* blocks = (const uint64_t*)data; | |
| for (size_t i = 0; i < nblocks; i++) { | |
| uint64_t k1 = blocks[i * 2]; | |
| uint64_t k2 = blocks[i * 2 + 1]; | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| h1 = rotl64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| h2 = rotl64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
| } | |
| const uint8_t* tail = data + nblocks * 16; | |
| uint64_t k1 = 0, k2 = 0; | |
| switch (len & 15) { | |
| case 15: k2 ^= uint64_t(tail[14]) << 48; | |
| case 14: k2 ^= uint64_t(tail[13]) << 40; | |
| case 13: k2 ^= uint64_t(tail[12]) << 32; | |
| case 12: k2 ^= uint64_t(tail[11]) << 24; | |
| case 11: k2 ^= uint64_t(tail[10]) << 16; | |
| case 10: k2 ^= uint64_t(tail[9]) << 8; | |
| case 9: k2 ^= uint64_t(tail[8]); | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| case 8: k1 ^= uint64_t(tail[7]) << 56; | |
| case 7: k1 ^= uint64_t(tail[6]) << 48; | |
| case 6: k1 ^= uint64_t(tail[5]) << 40; | |
| case 5: k1 ^= uint64_t(tail[4]) << 32; | |
| case 4: k1 ^= uint64_t(tail[3]) << 24; | |
| case 3: k1 ^= uint64_t(tail[2]) << 16; | |
| case 2: k1 ^= uint64_t(tail[1]) << 8; | |
| case 1: k1 ^= uint64_t(tail[0]); | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| } | |
| h1 ^= len; | |
| h2 ^= len; | |
| h1 += h2; | |
| h2 += h1; | |
| h1 = fmix64(h1); | |
| h2 = fmix64(h2); | |
| h1 += h2; | |
| return h1; | |
| } | |
| // ======================================================= | |
| // Linear-probing hash-only map | |
| // ======================================================= | |
| template<typename V> | |
| class HashOnlyLinearMap { | |
| struct Slot { | |
| uint64_t hash = 0; | |
| bool occupied = false; | |
| V value; | |
| }; | |
| std::vector<Slot> table; | |
| size_t mask; | |
| public: | |
| explicit HashOnlyLinearMap(size_t power_of_two_capacity) { | |
| size_t cap = 1ULL << power_of_two_capacity; | |
| table.resize(cap); | |
| mask = cap - 1; | |
| } | |
| void insert(uint64_t hash, const V& value) { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| Slot& s = table[idx]; | |
| if (!s.occupied) { | |
| s.occupied = true; | |
| s.hash = hash; | |
| s.value = value; | |
| return; | |
| } | |
| if (s.hash == hash) { | |
| s.value = value; | |
| return; | |
| } | |
| idx = (idx + 1) & mask; // linear probe | |
| } | |
| } | |
| const V* find(uint64_t hash) const { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| const Slot& s = table[idx]; | |
| if (!s.occupied) { | |
| return nullptr; | |
| } | |
| if (s.hash == hash) { | |
| return &s.value; | |
| } | |
| idx = (idx + 1) & mask; | |
| } | |
| } | |
| }; | |
| // ======================================================= | |
| // Benchmark | |
| // ======================================================= | |
| int main() { | |
| constexpr size_t N = 2'000'000; | |
| std::vector<uint64_t> keys(N); | |
| std::mt19937_64 rng(123); | |
| for (auto& k : keys) k = rng(); | |
| // Custom map | |
| HashOnlyLinearMap<uint64_t> custom_map(24); // ~2M slots | |
| /* | |
| * Custom linear-probe map: 69689us | |
| std::unordered_map: 298924us | |
| no hash, per insert / lookup half | |
| Custom linear-probe map: 18881us | |
| std::unordered_map: 278250us | |
| */ | |
| for (auto k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| custom_map.insert(k, k); | |
| } | |
| auto t1 = std::chrono::high_resolution_clock::now(); | |
| uint64_t sum1 = 0; | |
| for (const auto& k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| if (auto* v = custom_map.find(k)) { | |
| sum1 += *v; | |
| } | |
| } | |
| auto t2 = std::chrono::high_resolution_clock::now(); | |
| // std::unordered_map | |
| std::unordered_map<uint64_t, std::vector<uint64_t>> std_map; | |
| std_map.reserve(N); | |
| auto t3 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| std_map[k].push_back(k); | |
| } | |
| uint64_t sum2 = 0; | |
| for (auto k : keys) { | |
| for (auto x : std_map[k]) sum2 += x; | |
| } | |
| auto t4 = std::chrono::high_resolution_clock::now(); | |
| std::cout << "Custom linear-probe map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us\n"; | |
| std::cout << "std::unordered_map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count() << "us\n"; | |
| std::cout << "Ignore sums: " << sum1 << " " << sum2 << "\n"; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <vector> | |
| #include <unordered_map> | |
| #include <random> | |
| #include <chrono> | |
| #include <cstdint> | |
| // ======================================================= | |
| // MurmurHash3 x64 → 64-bit | |
| // ======================================================= | |
| static inline uint64_t rotl64(uint64_t x, int8_t r) { | |
| return (x << r) | (x >> (64 - r)); | |
| } | |
| static inline uint64_t fmix64(uint64_t k) { | |
| k ^= k >> 33; | |
| k *= 0xff51afd7ed558ccdULL; | |
| k ^= k >> 33; | |
| k *= 0xc4ceb9fe1a85ec53ULL; | |
| k ^= k >> 33; | |
| return k; | |
| } | |
| uint64_t murmur3_64(const void* key, size_t len, uint64_t seed = 0) { | |
| const uint8_t* data = static_cast<const uint8_t*>(key); | |
| size_t nblocks = len / 16; | |
| uint64_t h1 = seed; | |
| uint64_t h2 = seed; | |
| const uint64_t c1 = 0x87c37b91114253d5ULL; | |
| const uint64_t c2 = 0x4cf5ad432745937fULL; | |
| const uint64_t* blocks = (const uint64_t*)data; | |
| for (size_t i = 0; i < nblocks; i++) { | |
| uint64_t k1 = blocks[i * 2]; | |
| uint64_t k2 = blocks[i * 2 + 1]; | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| h1 = rotl64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729; | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| h2 = rotl64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5; | |
| } | |
| const uint8_t* tail = data + nblocks * 16; | |
| uint64_t k1 = 0, k2 = 0; | |
| switch (len & 15) { | |
| case 15: k2 ^= uint64_t(tail[14]) << 48; | |
| case 14: k2 ^= uint64_t(tail[13]) << 40; | |
| case 13: k2 ^= uint64_t(tail[12]) << 32; | |
| case 12: k2 ^= uint64_t(tail[11]) << 24; | |
| case 11: k2 ^= uint64_t(tail[10]) << 16; | |
| case 10: k2 ^= uint64_t(tail[9]) << 8; | |
| case 9: k2 ^= uint64_t(tail[8]); | |
| k2 *= c2; k2 = rotl64(k2, 33); k2 *= c1; h2 ^= k2; | |
| case 8: k1 ^= uint64_t(tail[7]) << 56; | |
| case 7: k1 ^= uint64_t(tail[6]) << 48; | |
| case 6: k1 ^= uint64_t(tail[5]) << 40; | |
| case 5: k1 ^= uint64_t(tail[4]) << 32; | |
| case 4: k1 ^= uint64_t(tail[3]) << 24; | |
| case 3: k1 ^= uint64_t(tail[2]) << 16; | |
| case 2: k1 ^= uint64_t(tail[1]) << 8; | |
| case 1: k1 ^= uint64_t(tail[0]); | |
| k1 *= c1; k1 = rotl64(k1, 31); k1 *= c2; h1 ^= k1; | |
| } | |
| h1 ^= len; | |
| h2 ^= len; | |
| h1 += h2; | |
| h2 += h1; | |
| h1 = fmix64(h1); | |
| h2 = fmix64(h2); | |
| h1 += h2; | |
| return h1; | |
| } | |
| // ======================================================= | |
| // Linear-probing hash-only map | |
| // ======================================================= | |
| template<typename V> | |
| class HashOnlyLinearMap { | |
| struct KVPair { | |
| uint64_t _hash = 0; | |
| V _value; | |
| inline bool add(uint64_t hash, const V& val) { | |
| if (_hash == 0) { | |
| _hash = hash; | |
| _value = val; | |
| return true; | |
| } else if (_hash == hash) { | |
| _value = val; | |
| return true; | |
| } | |
| return false; | |
| } | |
| inline bool find(uint64_t hash, const V** val) const { | |
| if (_hash == 0) { | |
| return false; | |
| } else if (_hash == hash) { | |
| *val = &_value; | |
| return true; | |
| } | |
| return false; | |
| } | |
| }; | |
| struct Slot { | |
| KVPair pairs[8]; | |
| inline bool add(uint64_t hash, const V& val) { | |
| return pairs[0].add(hash, val) || | |
| pairs[1].add(hash, val) || | |
| pairs[2].add(hash, val) || | |
| pairs[3].add(hash, val) || | |
| pairs[4].add(hash, val) || | |
| pairs[5].add(hash, val) || | |
| pairs[6].add(hash, val) || | |
| pairs[7].add(hash, val); | |
| } | |
| inline bool find(uint64_t hash, const V** val) const { | |
| return pairs[0].find(hash, val) || | |
| pairs[1].find(hash, val) || | |
| pairs[2].find(hash, val) || | |
| pairs[3].find(hash, val) || | |
| pairs[4].find(hash, val) || | |
| pairs[5].find(hash, val) || | |
| pairs[6].find(hash, val) || | |
| pairs[7].find(hash, val); | |
| } | |
| }; | |
| std::vector<Slot> table; | |
| size_t mask; | |
| public: | |
| explicit HashOnlyLinearMap(size_t power_of_two_capacity) { | |
| size_t cap = 1ULL << power_of_two_capacity; | |
| table.resize(cap); | |
| mask = cap - 1; | |
| } | |
| void insert(uint64_t hash, const V& value) { | |
| size_t idx = hash & mask; | |
| while (true) { | |
| Slot& s = table[idx]; | |
| if (s.add(hash, value)) { | |
| return; | |
| } | |
| idx = (idx + 1) & mask; // linear probe | |
| } | |
| } | |
| const V* find(uint64_t hash) const { | |
| size_t idx = hash & mask; | |
| const V* myPtr = nullptr; | |
| while (true) { | |
| const Slot& s = table[idx]; | |
| if (s.find(hash, &myPtr)) { | |
| return myPtr; | |
| } | |
| idx = (idx + 1) & mask; | |
| } | |
| } | |
| }; | |
| // ======================================================= | |
| // Benchmark | |
| // ======================================================= | |
| int main() { | |
| constexpr size_t N = 10'000'000; | |
| std::vector<uint64_t> keys(N); | |
| std::mt19937_64 rng(123); | |
| for (auto& k : keys) k = rng(); | |
| // Custom map | |
| HashOnlyLinearMap<uint64_t> custom_map(24); // ~2M slots | |
| /* | |
| * Custom linear-probe map: 69689us | |
| std::unordered_map: 298924us | |
| no hash, per insert / lookup half | |
| Custom linear-probe map: 18881us | |
| std::unordered_map: 278250us | |
| jweinstein@JOSWEINS-M-J60X cwork % ./hashbench | |
| Custom linear-probe map: 91792us | |
| std::unordered_map: 3308128us | |
| */ | |
| for (auto k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| custom_map.insert(k, k); | |
| } | |
| auto t1 = std::chrono::high_resolution_clock::now(); | |
| uint64_t sum1 = 0; | |
| for (const auto& k : keys) { | |
| //uint64_t h = murmur3_64(&k, sizeof(k)); | |
| if (auto* v = custom_map.find(k)) { | |
| sum1 += *v; | |
| } | |
| } | |
| auto t2 = std::chrono::high_resolution_clock::now(); | |
| // std::unordered_map | |
| std::unordered_map<uint64_t, std::vector<uint64_t>> std_map; | |
| auto t3 = std::chrono::high_resolution_clock::now(); | |
| for (auto k : keys) { | |
| std_map[k].push_back(k); | |
| } | |
| uint64_t sum2 = 0; | |
| for (auto k : keys) { | |
| for (auto x : std_map[k]) sum2 += x; | |
| } | |
| auto t4 = std::chrono::high_resolution_clock::now(); | |
| std::cout << "Custom linear-probe map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us\n"; | |
| std::cout << "std::unordered_map: " | |
| << std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count() << "us\n"; | |
| std::cout << "Ignore sums: " << sum1 << " " << sum2 << "\n"; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The 10 million run for inline chaining does around 91ms for 10 million keys lookup