Skip to content

Instantly share code, notes, and snippets.

@goldsborough
Created September 26, 2015 23:12
Show Gist options
  • Save goldsborough/5df674bd6f9b7ae5e781 to your computer and use it in GitHub Desktop.
Save goldsborough/5df674bd6f9b7ae5e781 to your computer and use it in GitHub Desktop.
A hash-map implementation using cuckoo-hashing.
template<typename Key, typename Value>
class Table
{
public:
using size_t = std::size_t;
struct Item
{
using hashes_t = std::pair<size_t, size_t>;
Item(const hashes_t& h,
const Key& k = Key(),
const Value& v = Value())
: key(k)
, value(v)
, hashes(h)
{ }
Key key;
Value value;
hashes_t hashes;
};
class Data
{
public:
using item_ptr = Item*;
using data_t = item_ptr*;
using iterator = const data_t;
Data()
: _size(0)
, _items(nullptr)
{ }
Data(size_t size)
: _size(size)
, _items(new item_ptr[_size])
{ }
Data(const Data& other)
: _size(other._size)
, _items(new item_ptr[_size])
{
std::copy(other.begin(), other.end(), begin());
}
Data(Data&& other) noexcept
: Data()
{
swap(other);
}
Data& operator=(Data other)
{
swap(other);
return *this;
}
void swap(Data& other) noexcept
{
using std::swap;
swap(_size, other._size);
swap(_items, other._items);
}
friend void swap(Data& first, Data& second)
{
first.swap(second);
}
~Data()
{
delete _items;
}
item_ptr& operator[](size_t index) const
{
return _items[index];
}
iterator begin() const
{
return _items;
}
iterator end() const
{
return _items + _size;
}
size_t size() const
{
return _size;
}
void clear()
{
for (auto& i : *this) delete i;
}
private:
size_t _size;
data_t _items;
};
using item_ptr = typename Data::item_ptr;
using iterator = typename Data::iterator;
using constants_t = std::array<size_t, 3>;
enum Constants { A, B, PRIME };
Table() = default;
Table(size_t table_size)
: _data(table_size)
{
generate_constants();
nullify();
}
Table(const Table& other)
: _data(other._data)
, _constants(other._constants)
{ }
Table(Table&& other) noexcept
: Table()
{
swap(other);
}
Table& operator=(Table other)
{
swap(other);
return *this;
}
void swap(Table& other) noexcept
{
using std::swap;
swap(_data, other._data);
swap(_constants, other._constants);
}
friend void swap(Table& first, Table& second) noexcept
{
first.swap(second);
}
~Table()
{
clear();
}
iterator begin() const
{
return _data.begin();
}
iterator end() const
{
return _data.end();
}
item_ptr& operator[](size_t index) const
{
return _data[index];
}
size_t hash(size_t pre_hash) const
{
size_t result = _constants[A] * pre_hash + _constants[B];
return (result % _constants[PRIME]) % _data.size();
}
void generate_constants()
{
using distribution_t = std::uniform_int_distribution<size_t>;
static std::random_device seed;
static std::mt19937 generator(seed());
static const size_t bit_width = sizeof(size_t) * 8;
distribution_t distribution(bit_width, 1E6);
_constants[A] = distribution(generator);
_constants[B] = distribution(generator);
do
{
_constants[PRIME] = distribution(generator);
} while (! _is_prime(_constants[PRIME]));
}
const Data& items() const
{
return _data;
}
void reset(size_t new_size)
{
_data = Data(new_size);
nullify();
}
void nullify()
{
std::fill(begin(), end(), nullptr);
}
void clear()
{
_data.clear();
}
size_t size() const
{
return _data.size();
}
private:
static bool _is_prime(size_t value)
{
if (value <= 1) return false;
if (value <= 3) return true;
if (value % 2 == 0 || value % 3 == 0) return false;
const size_t boundary = std::sqrt(value);
for (size_t prime = 5; prime <= boundary; prime += 6)
{
if (value % prime == 0 || value % (prime + 2) == 0)
{
return false;
}
}
return true;
}
Data _data;
constants_t _constants;
};
template<typename Key, typename Value>
class CuckooHashMap
{
public:
using size_t = std::size_t;
using pre_hash_t = std::function<size_t(const Key&)>;
static const size_t MINIMUM_CAPACITY = 16;
struct Pair
{
Pair(const Key& k, Value& v)
: key(k)
, value(v)
{ }
const Key& key;
Value& value;
};
CuckooHashMap(const pre_hash_t& pre_hash = std::hash<Key>(),
size_t capacity = MINIMUM_CAPACITY)
: _size(0)
, _capacity(capacity)
, _pre_hash(pre_hash)
, _tables({
_capacity/2,
_capacity/2
})
{ }
CuckooHashMap(std::initializer_list<std::pair<Key, Value>> items,
const pre_hash_t& pre_hash = std::hash<Key>(),
size_t capacity = MINIMUM_CAPACITY)
: _size(0)
, _capacity(std::max(items.size(), capacity))
, _pre_hash(pre_hash)
, _tables({
_capacity/2,
_capacity/2
})
{ }
CuckooHashMap(const CuckooHashMap& other)
: _size(other._size)
, _capacity(other._capacity)
, _pre_hash(other._pre_hash)
, _tables(other._tables)
{ }
CuckooHashMap(CuckooHashMap&& other) noexcept
: CuckooHashMap()
{
swap(other);
}
CuckooHashMap& operator=(CuckooHashMap other)
{
swap(other);
return *this;
}
void swap(CuckooHashMap& other) noexcept
{
using std::swap;
swap(_size, other._size);
swap(_capacity, other._capacity);
swap(_tables, other._tables);
swap(_pre_hash, other._pre_hash);
}
friend void swap(CuckooHashMap& first, CuckooHashMap& second)
{
first.swap(second);
}
~CuckooHashMap() = default;
void insert(const Key& key, const Value& value)
{
auto update = _try_update(key, value);
if (update.second) return;
_insert(new item_t(_hashes(key), key, value));
}
template<typename Itr>
void insert(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
insert(*begin);
}
}
template<typename Itr>
void erase(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
erase(*begin);
}
}
void erase(const Key& key)
{
if (! erase_if_found(key))
{
throw std::invalid_argument("No such key!");
}
}
bool erase_if_found(const Key& key)
{
auto hashes = _hashes(key);
auto& first = _first(hashes.first);
if (first && first->key == key)
{
_erase(first);
return true;
}
auto& second = _second(hashes.second);
if (second && second->key == key)
{
_erase(second);
return true;
}
return false;
}
void clear()
{
_capacity = MINIMUM_CAPACITY;
for (auto& table : _tables)
{
table.clear();
table.reset(_capacity/2);
}
_size = 0;
}
Value& at(const Key& key)
{
return _at(key);
}
const Value& at(const Key& key) const
{
return _at(key);
}
bool contains(const Key& key) const
{
auto hashes = _hashes(key);
auto first = _first(hashes.first);
if (first && first->key == key)
{
return true;
}
auto second = _second(hashes.second);
if (second && second->key == key)
{
return true;
}
return false;
}
Value& operator[](const Key& key)
{
try
{
return _at(key);
}
catch(std::invalid_argument&)
{
auto p_item = new item_t(_hashes(key), key);
_insert(p_item);
return p_item->value;
}
}
bool insert_or_assign(const Key& key, Value&& value)
{
try
{
_at(key) = std::forward<Value>(value);
return false;
}
catch(std::invalid_argument&)
{
_insert(new item_t(_hashes(key), key, value));
return true;
}
}
size_t size() const
{
return _size;
}
size_t capacity() const
{
return _capacity;
}
bool is_empty() const
{
return _size == 0;
}
const pre_hash_t& pre_hash() const
{
return _pre_hash;
}
private:
static const size_t CYCLE_LIMIT = 16;
enum Index { FIRST, SECOND };
using table_t = Table<Key, Value>;
using item_t = typename table_t::Item;
using item_ptr = item_t*;
using container_t = std::array<table_t, 2>;
using hashes_t = std::pair<size_t, size_t>;
using update_t = std::pair<hashes_t, bool>;
using data_t = std::array<typename table_t::Data, 2>;
void _insert(item_ptr p_item)
{
size_t iterations = 0;
do
{
if (++iterations > CYCLE_LIMIT)
{
_rehash(_items());
break;
}
std::swap(p_item, _first(p_item));
if (! p_item) break;
std::swap(p_item, _second(p_item));
} while(p_item);
if (++_size == _capacity/2) _resize();
}
void _erase(item_ptr& p_item)
{
delete p_item;
p_item = nullptr;
if (--_size == _capacity/8) _resize();
}
Value& _at(const Key& key) const
{
auto hashes = _hashes(key);
auto& first = _first(hashes.first);
if (first && first->key == key)
{
return first->value;
}
auto& second = _second(hashes.second);
if (second && second->key == key)
{
return second->value;
}
throw std::invalid_argument("No such key!");
}
update_t _try_update(const Key& key, const Value& value) const
{
auto hashes = _hashes(key);
auto& first = _first(hashes.first);
if (first && first->key == key)
{
first->value = value;
return {hashes, true};
}
auto& second = _second(hashes.second);
if (second && second->key == key)
{
second->value = value;
return {hashes, true};
}
return {hashes, false};
}
hashes_t _hashes(const Key& key) const
{
size_t pre_hash = _pre_hash(key);
size_t hash_1 = _tables[FIRST].hash(pre_hash);
size_t hash_2 = _tables[SECOND].hash(pre_hash);
return {hash_1, hash_2};
}
inline item_ptr& _first(const item_ptr item) const
{
return _first(item->hashes.first);
}
inline item_ptr& _second(const item_ptr item) const
{
return _second(item->hashes.second);
}
inline item_ptr& _first(size_t hash) const
{
return _tables[FIRST][hash];
}
inline item_ptr& _second(size_t hash) const
{
return _tables[SECOND][hash];
}
data_t _items()
{
auto& first = _tables[FIRST].items();
auto& second = _tables[SECOND].items();
return {first, second};
}
void _resize()
{
size_t new_capacity = _size * 4;
if (new_capacity < MINIMUM_CAPACITY) return;
size_t old_capacity = _capacity;
_capacity = new_capacity;
auto old = _items();
for (auto& table : _tables)
{
table.reset(_capacity);
}
_rehash(std::move(old), old_capacity);
}
void _rehash(data_t old, size_t old_capacity)
{
size_t old_table_size = old_capacity/2;
do
{
for (auto& table : _tables)
{
table.nullify();
table.generate_constants();
}
} while (! _try_rehash(old, old_table_size));
}
bool _try_rehash(data_t old, size_t old_table_size)
{
for (auto& table : old)
{
for (auto& p_item : table)
{
if (! p_item) continue;
p_item->hashes = _hashes(p_item->key);
size_t iterations = 0;
do
{
if (++iterations > CYCLE_LIMIT) return false;
std::swap(p_item, _first(p_item));
if (! p_item) break;
std::swap(p_item, _second(p_item));
} while(p_item);
}
}
return true;
}
void _rehash(data_t old)
{
_rehash(std::move(old), _capacity);
}
size_t _size;
size_t _capacity;
container_t _tables;
pre_hash_t _pre_hash;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment