Created
September 2, 2015 20:24
-
-
Save H2CO3/01ea74b6848743117458 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// hash_table.hpp | |
// The Shift-C compiler | |
// | |
// Created by Arpad Goretity (H2CO3) | |
// on 02/06/2015 | |
// | |
// Licensed under the 2-clause BSD License | |
// | |
#ifndef SHIFTC_HASH_TABLE_HPP | |
#define SHIFTC_HASH_TABLE_HPP | |
#include <vector> | |
#include <functional> | |
#include <cstdlib> | |
#include <cassert> | |
#include "util.hpp" | |
namespace shc { | |
template< | |
typename Key, | |
typename Value, | |
typename Hash = std::hash<Key>, | |
typename Equal = std::equal_to<Key> | |
> | |
struct hash_table { | |
protected: | |
struct Slot { | |
Key key; | |
Value value; | |
bool used; | |
Slot() : key {}, value {}, used(false) {} | |
Slot(Key k, Value v) : key(std::move(k)), value(std::move(v)), used(true) {} | |
Slot(const Slot &) = default; | |
Slot(Slot &&other) : | |
key(std::move(other.key)), | |
value(std::move(other.value)), | |
used(other.used) | |
{ | |
other.used = false; | |
} | |
friend void swap(Slot &lhs, Slot &rhs) { | |
using std::swap; // enable ADL just in case | |
swap(lhs.key, rhs.key); | |
swap(lhs.value, rhs.value); | |
swap(lhs.used, rhs.used); | |
} | |
Slot &operator=(Slot other) { | |
swap(*this, other); | |
return *this; | |
} | |
}; | |
std::vector<Slot> slots; | |
size_t count; | |
size_t max_hash_offset; | |
// the sole purpose of this function is that we can | |
// explicitly call const member functions on 'this'. | |
auto cthis() const { | |
return this; | |
} | |
size_t key_index(const Key &key) const { | |
return Hash{}(key) & mask(); | |
} | |
size_t mask() const { | |
// table size must be a power of two | |
assert(slots.size() && !(slots.size() & (slots.size() - 1))); | |
return slots.size() - 1; | |
} | |
bool should_rehash() const { | |
// keep load factor below 0.75 | |
// this ratio is chosen carefully so that it can be optimized well: | |
// it is equivalent with ((size << 1) + size) >> 2. | |
return slots.empty() || count > slots.size() * 3 / 4; | |
} | |
const Slot *get_slot(const Key &key) const { | |
// do not try modulo by 0. An empty table has no values. | |
if (slots.empty()) { | |
return nullptr; | |
} | |
size_t i = key_index(key); | |
size_t hash_offset = 0; | |
// linear probing using a cached maximal probe sequence length. | |
// This avoids the need to mark deleted slots as special and | |
// fixes the performance problem whereby searching for a key after | |
// having performed lots of deletions results in O(n) running time. | |
// (max_hash_offset is one less than the length of the longest sequence.) | |
do { | |
if (slots[i].used && Equal{}(slots[i].key, key)) { | |
return &slots[i]; | |
} | |
i = (i + 1) & mask(); | |
hash_offset++; | |
} while (hash_offset <= max_hash_offset); | |
return nullptr; | |
} | |
Slot *get_slot(const Key &key) { | |
return const_cast<Slot *>(cthis()->get_slot(key)); | |
} | |
void insert_nonexistent_norehash(Key key, Value value) { | |
assert(should_rehash() == false); | |
assert(size() < slots.size()); // requires empty slots | |
assert(get_slot(key) == nullptr); | |
size_t i = key_index(key); | |
size_t hash_offset = 0; | |
// first, find an empty (unused) slot | |
while (slots[i].used) { | |
i = (i + 1) & mask(); | |
hash_offset++; | |
} | |
// then, perform the actual insertion. | |
// this also marks the slot as used. | |
slots[i] = { std::move(key), std::move(value) }; | |
assert(slots[i].used); | |
// unconditionally increment the size because | |
// we know that the key didn't exist before. | |
count++; | |
// finally, update maximal length of probe sequences (minus one) | |
if (hash_offset > max_hash_offset) { | |
max_hash_offset = hash_offset; | |
} | |
} | |
void rehash() { | |
// compute new size. Must be a power of two. | |
const size_t new_size = slots.empty() ? 8 : slots.size() * 2; | |
// move original slot array out of *this and reset internal state | |
auto old_slots = std::move(slots); | |
// language lawyer: move() need not clear std::vector. | |
// this->clear() takes care of that, however | |
// (as well as zeroing out count and max_hash_offset.) | |
clear(); | |
// make room for new slots (need to default-construct | |
// in order for them to be in an 'unused'/free state) | |
slots.resize(new_size); | |
// re-insert each key-value pair | |
for (auto &slot : old_slots) { | |
if (slot.used) { | |
insert_nonexistent_norehash(std::move(slot.key), std::move(slot.value)); | |
} | |
} | |
} | |
public: | |
// "pointed-to" type of iterators; a Key + Value pair | |
typedef std::pair<const Key, Value> key_value; | |
///////////////// | |
// Constuctors // | |
///////////////// | |
hash_table() noexcept : slots {}, count(0), max_hash_offset(0) {} | |
hash_table(const hash_table &other) = default; | |
hash_table(hash_table &&other) noexcept : | |
slots(std::move(other.slots)), | |
count(other.count), | |
max_hash_offset(other.max_hash_offset) | |
{ | |
other.clear(); | |
} | |
// naive implementation, may be improved. not sure if worth the effort. | |
hash_table(std::initializer_list<key_value> elems) : hash_table() { | |
for (const auto &elem : elems) { | |
// cannot move from an initializer_list | |
set(elem.first, elem.second); | |
} | |
} | |
///////////////////////// | |
// Resource management // | |
///////////////////////// | |
hash_table &operator=(hash_table other) noexcept { | |
swap(*this, other); | |
return *this; | |
} | |
friend void swap(hash_table &lhs, hash_table &rhs) noexcept { | |
using std::swap; | |
swap(lhs.slots, rhs.slots); | |
swap(lhs.count, rhs.count); | |
swap(lhs.max_hash_offset, rhs.max_hash_offset); | |
} | |
void clear() noexcept { | |
slots.clear(); | |
count = 0; | |
max_hash_offset = 0; | |
} | |
/////////////////////////////////////////////////////////////// | |
// Actual hash table operations: Get, Insert/Replace, Delete // | |
/////////////////////////////////////////////////////////////// | |
const Value *get(const Key &key) const { | |
if (const Slot *slot = get_slot(key)) { | |
return &slot->value; | |
} | |
return nullptr; | |
} | |
Value *get(const Key &key) { | |
if (Slot *slot = get_slot(key)) { | |
return &slot->value; | |
} | |
return nullptr; | |
} | |
void set(Key key, Value value) { | |
// if the key is already in the table, just replace it and move on | |
if (Value *candidate = get(key)) { | |
*candidate = std::move(value); | |
return; | |
} | |
// else we need to insert it. First, check if we need to expand. | |
if (should_rehash()) { | |
rehash(); | |
} | |
// then we actually insert the key. | |
insert_nonexistent_norehash(std::move(key), std::move(value)); | |
} | |
void remove(const Key &key) { | |
if (Slot *slot = get_slot(key)) { | |
// destroy key and value (we don't want to surprise users of RAII) | |
// This also marks the slot as unused. | |
*slot = {}; | |
assert(slot->used == false); | |
// removing an existing key means we need to decrease the table size. | |
count--; | |
} | |
} | |
size_t size() const { | |
return count; | |
} | |
bool empty() const { | |
return size() == 0; | |
} | |
double load_factor() const { | |
return double(size()) / slots.size(); | |
} | |
////////////////// | |
// Iterator API // | |
////////////////// | |
struct const_iterator { | |
protected: | |
friend struct hash_table; | |
const hash_table *owner; | |
size_t slot_index; | |
const_iterator(const hash_table *p_owner, size_t p_slot_index) : | |
owner(p_owner), | |
slot_index(p_slot_index) | |
{} | |
const_iterator(const const_iterator &other) = default; | |
public: | |
const key_value *operator->() const { | |
shc_assert(slot_index < owner->slots.size(), "cannot dereference end iterator"); | |
auto &slot = owner->slots[slot_index]; | |
// this is safe since pair<const Key, Value> and Slot { Key, Value, Bool } | |
// have a (layout-compatible) common initial sequence. | |
return reinterpret_cast<const key_value *>(&slot); | |
} | |
const key_value &operator*() const { | |
return *operator->(); | |
} | |
const_iterator &operator++() { | |
shc_assert(slot_index < owner->slots.size(), "cannot increment end iterator"); | |
do { | |
slot_index++; | |
} while (slot_index < owner->slots.size() && owner->slots[slot_index].used == false); | |
return *this; | |
} | |
const_iterator operator++(int) { | |
auto prev(*this); | |
++*this; | |
return prev; | |
} | |
bool operator==(const const_iterator &other) const { | |
return owner == other.owner && slot_index == other.slot_index; | |
} | |
bool operator!=(const const_iterator &other) const { | |
return !operator==(other); | |
} | |
}; | |
struct iterator : public const_iterator { | |
protected: | |
friend struct hash_table; | |
iterator(const const_iterator &other) : const_iterator(other) {} | |
public: | |
key_value &operator*() { | |
return *operator->(); | |
} | |
key_value *operator->() { | |
return const_cast<key_value *>( | |
static_cast<const const_iterator *>(this)->operator->() | |
); | |
} | |
}; | |
const_iterator begin() const { | |
auto it = const_iterator(this, 0); | |
while (it.slot_index < slots.size() && slots[it.slot_index].used == false) { | |
it.slot_index++; | |
} | |
return it; | |
} | |
const_iterator end() const { | |
return const_iterator(this, slots.size()); | |
} | |
iterator begin() { | |
return iterator(cthis()->begin()); | |
} | |
iterator end() { | |
return iterator(cthis()->end()); | |
} | |
const_iterator cbegin() const { | |
return begin(); | |
} | |
const_iterator cend() const { | |
return end(); | |
} | |
const_iterator find(const Key &key) const { | |
if (const Slot *slot = get_slot(key)) { | |
return const_iterator(this, slot - slots.data()); | |
} | |
return end(); | |
} | |
iterator find(const Key &key) { | |
return iterator(cthis()->find(key)); | |
} | |
void erase(const const_iterator &it) { | |
assert(it.owner == this); // cannot erase an element of another instance | |
remove(it->first); | |
} | |
}; | |
} | |
#endif // SHIFTC_HASH_TABLE_HPP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment