Created
September 26, 2015 23:12
-
-
Save goldsborough/5df674bd6f9b7ae5e781 to your computer and use it in GitHub Desktop.
A hash-map implementation using cuckoo-hashing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename Key, typename Value> | |
class Table | |
{ | |
public: | |
using size_t = std::size_t; | |
struct Item | |
{ | |
using hashes_t = std::pair<size_t, size_t>; | |
Item(const hashes_t& h, | |
const Key& k = Key(), | |
const Value& v = Value()) | |
: key(k) | |
, value(v) | |
, hashes(h) | |
{ } | |
Key key; | |
Value value; | |
hashes_t hashes; | |
}; | |
class Data | |
{ | |
public: | |
using item_ptr = Item*; | |
using data_t = item_ptr*; | |
using iterator = const data_t; | |
Data() | |
: _size(0) | |
, _items(nullptr) | |
{ } | |
Data(size_t size) | |
: _size(size) | |
, _items(new item_ptr[_size]) | |
{ } | |
Data(const Data& other) | |
: _size(other._size) | |
, _items(new item_ptr[_size]) | |
{ | |
std::copy(other.begin(), other.end(), begin()); | |
} | |
Data(Data&& other) noexcept | |
: Data() | |
{ | |
swap(other); | |
} | |
Data& operator=(Data other) | |
{ | |
swap(other); | |
return *this; | |
} | |
void swap(Data& other) noexcept | |
{ | |
using std::swap; | |
swap(_size, other._size); | |
swap(_items, other._items); | |
} | |
friend void swap(Data& first, Data& second) | |
{ | |
first.swap(second); | |
} | |
~Data() | |
{ | |
delete _items; | |
} | |
item_ptr& operator[](size_t index) const | |
{ | |
return _items[index]; | |
} | |
iterator begin() const | |
{ | |
return _items; | |
} | |
iterator end() const | |
{ | |
return _items + _size; | |
} | |
size_t size() const | |
{ | |
return _size; | |
} | |
void clear() | |
{ | |
for (auto& i : *this) delete i; | |
} | |
private: | |
size_t _size; | |
data_t _items; | |
}; | |
using item_ptr = typename Data::item_ptr; | |
using iterator = typename Data::iterator; | |
using constants_t = std::array<size_t, 3>; | |
enum Constants { A, B, PRIME }; | |
Table() = default; | |
Table(size_t table_size) | |
: _data(table_size) | |
{ | |
generate_constants(); | |
nullify(); | |
} | |
Table(const Table& other) | |
: _data(other._data) | |
, _constants(other._constants) | |
{ } | |
Table(Table&& other) noexcept | |
: Table() | |
{ | |
swap(other); | |
} | |
Table& operator=(Table other) | |
{ | |
swap(other); | |
return *this; | |
} | |
void swap(Table& other) noexcept | |
{ | |
using std::swap; | |
swap(_data, other._data); | |
swap(_constants, other._constants); | |
} | |
friend void swap(Table& first, Table& second) noexcept | |
{ | |
first.swap(second); | |
} | |
~Table() | |
{ | |
clear(); | |
} | |
iterator begin() const | |
{ | |
return _data.begin(); | |
} | |
iterator end() const | |
{ | |
return _data.end(); | |
} | |
item_ptr& operator[](size_t index) const | |
{ | |
return _data[index]; | |
} | |
size_t hash(size_t pre_hash) const | |
{ | |
size_t result = _constants[A] * pre_hash + _constants[B]; | |
return (result % _constants[PRIME]) % _data.size(); | |
} | |
void generate_constants() | |
{ | |
using distribution_t = std::uniform_int_distribution<size_t>; | |
static std::random_device seed; | |
static std::mt19937 generator(seed()); | |
static const size_t bit_width = sizeof(size_t) * 8; | |
distribution_t distribution(bit_width, 1E6); | |
_constants[A] = distribution(generator); | |
_constants[B] = distribution(generator); | |
do | |
{ | |
_constants[PRIME] = distribution(generator); | |
} while (! _is_prime(_constants[PRIME])); | |
} | |
const Data& items() const | |
{ | |
return _data; | |
} | |
void reset(size_t new_size) | |
{ | |
_data = Data(new_size); | |
nullify(); | |
} | |
void nullify() | |
{ | |
std::fill(begin(), end(), nullptr); | |
} | |
void clear() | |
{ | |
_data.clear(); | |
} | |
size_t size() const | |
{ | |
return _data.size(); | |
} | |
private: | |
static bool _is_prime(size_t value) | |
{ | |
if (value <= 1) return false; | |
if (value <= 3) return true; | |
if (value % 2 == 0 || value % 3 == 0) return false; | |
const size_t boundary = std::sqrt(value); | |
for (size_t prime = 5; prime <= boundary; prime += 6) | |
{ | |
if (value % prime == 0 || value % (prime + 2) == 0) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
Data _data; | |
constants_t _constants; | |
}; | |
template<typename Key, typename Value> | |
class CuckooHashMap | |
{ | |
public: | |
using size_t = std::size_t; | |
using pre_hash_t = std::function<size_t(const Key&)>; | |
static const size_t MINIMUM_CAPACITY = 16; | |
struct Pair | |
{ | |
Pair(const Key& k, Value& v) | |
: key(k) | |
, value(v) | |
{ } | |
const Key& key; | |
Value& value; | |
}; | |
CuckooHashMap(const pre_hash_t& pre_hash = std::hash<Key>(), | |
size_t capacity = MINIMUM_CAPACITY) | |
: _size(0) | |
, _capacity(capacity) | |
, _pre_hash(pre_hash) | |
, _tables({ | |
_capacity/2, | |
_capacity/2 | |
}) | |
{ } | |
CuckooHashMap(std::initializer_list<std::pair<Key, Value>> items, | |
const pre_hash_t& pre_hash = std::hash<Key>(), | |
size_t capacity = MINIMUM_CAPACITY) | |
: _size(0) | |
, _capacity(std::max(items.size(), capacity)) | |
, _pre_hash(pre_hash) | |
, _tables({ | |
_capacity/2, | |
_capacity/2 | |
}) | |
{ } | |
CuckooHashMap(const CuckooHashMap& other) | |
: _size(other._size) | |
, _capacity(other._capacity) | |
, _pre_hash(other._pre_hash) | |
, _tables(other._tables) | |
{ } | |
CuckooHashMap(CuckooHashMap&& other) noexcept | |
: CuckooHashMap() | |
{ | |
swap(other); | |
} | |
CuckooHashMap& operator=(CuckooHashMap other) | |
{ | |
swap(other); | |
return *this; | |
} | |
void swap(CuckooHashMap& other) noexcept | |
{ | |
using std::swap; | |
swap(_size, other._size); | |
swap(_capacity, other._capacity); | |
swap(_tables, other._tables); | |
swap(_pre_hash, other._pre_hash); | |
} | |
friend void swap(CuckooHashMap& first, CuckooHashMap& second) | |
{ | |
first.swap(second); | |
} | |
~CuckooHashMap() = default; | |
void insert(const Key& key, const Value& value) | |
{ | |
auto update = _try_update(key, value); | |
if (update.second) return; | |
_insert(new item_t(_hashes(key), key, value)); | |
} | |
template<typename Itr> | |
void insert(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
insert(*begin); | |
} | |
} | |
template<typename Itr> | |
void erase(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
erase(*begin); | |
} | |
} | |
void erase(const Key& key) | |
{ | |
if (! erase_if_found(key)) | |
{ | |
throw std::invalid_argument("No such key!"); | |
} | |
} | |
bool erase_if_found(const Key& key) | |
{ | |
auto hashes = _hashes(key); | |
auto& first = _first(hashes.first); | |
if (first && first->key == key) | |
{ | |
_erase(first); | |
return true; | |
} | |
auto& second = _second(hashes.second); | |
if (second && second->key == key) | |
{ | |
_erase(second); | |
return true; | |
} | |
return false; | |
} | |
void clear() | |
{ | |
_capacity = MINIMUM_CAPACITY; | |
for (auto& table : _tables) | |
{ | |
table.clear(); | |
table.reset(_capacity/2); | |
} | |
_size = 0; | |
} | |
Value& at(const Key& key) | |
{ | |
return _at(key); | |
} | |
const Value& at(const Key& key) const | |
{ | |
return _at(key); | |
} | |
bool contains(const Key& key) const | |
{ | |
auto hashes = _hashes(key); | |
auto first = _first(hashes.first); | |
if (first && first->key == key) | |
{ | |
return true; | |
} | |
auto second = _second(hashes.second); | |
if (second && second->key == key) | |
{ | |
return true; | |
} | |
return false; | |
} | |
Value& operator[](const Key& key) | |
{ | |
try | |
{ | |
return _at(key); | |
} | |
catch(std::invalid_argument&) | |
{ | |
auto p_item = new item_t(_hashes(key), key); | |
_insert(p_item); | |
return p_item->value; | |
} | |
} | |
bool insert_or_assign(const Key& key, Value&& value) | |
{ | |
try | |
{ | |
_at(key) = std::forward<Value>(value); | |
return false; | |
} | |
catch(std::invalid_argument&) | |
{ | |
_insert(new item_t(_hashes(key), key, value)); | |
return true; | |
} | |
} | |
size_t size() const | |
{ | |
return _size; | |
} | |
size_t capacity() const | |
{ | |
return _capacity; | |
} | |
bool is_empty() const | |
{ | |
return _size == 0; | |
} | |
const pre_hash_t& pre_hash() const | |
{ | |
return _pre_hash; | |
} | |
private: | |
static const size_t CYCLE_LIMIT = 16; | |
enum Index { FIRST, SECOND }; | |
using table_t = Table<Key, Value>; | |
using item_t = typename table_t::Item; | |
using item_ptr = item_t*; | |
using container_t = std::array<table_t, 2>; | |
using hashes_t = std::pair<size_t, size_t>; | |
using update_t = std::pair<hashes_t, bool>; | |
using data_t = std::array<typename table_t::Data, 2>; | |
void _insert(item_ptr p_item) | |
{ | |
size_t iterations = 0; | |
do | |
{ | |
if (++iterations > CYCLE_LIMIT) | |
{ | |
_rehash(_items()); | |
break; | |
} | |
std::swap(p_item, _first(p_item)); | |
if (! p_item) break; | |
std::swap(p_item, _second(p_item)); | |
} while(p_item); | |
if (++_size == _capacity/2) _resize(); | |
} | |
void _erase(item_ptr& p_item) | |
{ | |
delete p_item; | |
p_item = nullptr; | |
if (--_size == _capacity/8) _resize(); | |
} | |
Value& _at(const Key& key) const | |
{ | |
auto hashes = _hashes(key); | |
auto& first = _first(hashes.first); | |
if (first && first->key == key) | |
{ | |
return first->value; | |
} | |
auto& second = _second(hashes.second); | |
if (second && second->key == key) | |
{ | |
return second->value; | |
} | |
throw std::invalid_argument("No such key!"); | |
} | |
update_t _try_update(const Key& key, const Value& value) const | |
{ | |
auto hashes = _hashes(key); | |
auto& first = _first(hashes.first); | |
if (first && first->key == key) | |
{ | |
first->value = value; | |
return {hashes, true}; | |
} | |
auto& second = _second(hashes.second); | |
if (second && second->key == key) | |
{ | |
second->value = value; | |
return {hashes, true}; | |
} | |
return {hashes, false}; | |
} | |
hashes_t _hashes(const Key& key) const | |
{ | |
size_t pre_hash = _pre_hash(key); | |
size_t hash_1 = _tables[FIRST].hash(pre_hash); | |
size_t hash_2 = _tables[SECOND].hash(pre_hash); | |
return {hash_1, hash_2}; | |
} | |
inline item_ptr& _first(const item_ptr item) const | |
{ | |
return _first(item->hashes.first); | |
} | |
inline item_ptr& _second(const item_ptr item) const | |
{ | |
return _second(item->hashes.second); | |
} | |
inline item_ptr& _first(size_t hash) const | |
{ | |
return _tables[FIRST][hash]; | |
} | |
inline item_ptr& _second(size_t hash) const | |
{ | |
return _tables[SECOND][hash]; | |
} | |
data_t _items() | |
{ | |
auto& first = _tables[FIRST].items(); | |
auto& second = _tables[SECOND].items(); | |
return {first, second}; | |
} | |
void _resize() | |
{ | |
size_t new_capacity = _size * 4; | |
if (new_capacity < MINIMUM_CAPACITY) return; | |
size_t old_capacity = _capacity; | |
_capacity = new_capacity; | |
auto old = _items(); | |
for (auto& table : _tables) | |
{ | |
table.reset(_capacity); | |
} | |
_rehash(std::move(old), old_capacity); | |
} | |
void _rehash(data_t old, size_t old_capacity) | |
{ | |
size_t old_table_size = old_capacity/2; | |
do | |
{ | |
for (auto& table : _tables) | |
{ | |
table.nullify(); | |
table.generate_constants(); | |
} | |
} while (! _try_rehash(old, old_table_size)); | |
} | |
bool _try_rehash(data_t old, size_t old_table_size) | |
{ | |
for (auto& table : old) | |
{ | |
for (auto& p_item : table) | |
{ | |
if (! p_item) continue; | |
p_item->hashes = _hashes(p_item->key); | |
size_t iterations = 0; | |
do | |
{ | |
if (++iterations > CYCLE_LIMIT) return false; | |
std::swap(p_item, _first(p_item)); | |
if (! p_item) break; | |
std::swap(p_item, _second(p_item)); | |
} while(p_item); | |
} | |
} | |
return true; | |
} | |
void _rehash(data_t old) | |
{ | |
_rehash(std::move(old), _capacity); | |
} | |
size_t _size; | |
size_t _capacity; | |
container_t _tables; | |
pre_hash_t _pre_hash; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment