Skip to content

Instantly share code, notes, and snippets.

@goldsborough
Created September 23, 2015 01:07
Show Gist options
  • Save goldsborough/199149bfe954ae016872 to your computer and use it in GitHub Desktop.
Save goldsborough/199149bfe954ae016872 to your computer and use it in GitHub Desktop.
A hash-map implementation with iterator support and multiple hashing options.
template<typename Key, typename Value>
class HashMap
{
private:
struct Node
{
Node(const Key& k = Key(),
const Value& v = Value(),
Node* following = nullptr,
Node* prev = nullptr)
: key(k)
, value(v)
, next(following)
, previous(prev)
{ }
Key key;
Value value;
Node* next;
Node* previous;
};
public:
using size_t = std::size_t;
using hash_function_t = std::function<size_t(const Key&)>;
static const size_t minimum_capacity = 16;
struct Pair
{
Pair(const Key& k = Key(),
const Value& v = Value())
: key(k), value(v)
{ }
Key key;
Value value;
};
class Iterator :
public std::iterator<std::bidirectional_iterator_tag, Pair>
{
public:
Iterator(size_t index = 0,
Node* node = nullptr,
Node** nodes = nullptr)
: _index(index)
, _current(node)
, _nodes(nodes)
{
if (node)
{
_pair.key = node->key;
_pair.value = node->value;
}
}
Iterator(const Iterator& other)
: _index(other._index)
, _current(other._current)
, _nodes(other._nodes)
, _pair(other._pair)
{ }
Iterator(Iterator&& other)
: Iterator()
{
swap(other);
}
Iterator& operator=(Iterator other)
{
swap(other);
return *this;
}
void swap(Iterator& other) noexcept
{
using std::swap;
swap(_index, other._index);
swap(_current, other._current);
swap(_nodes, other._nodes);
}
friend void swap(Iterator& first, Iterator& second) noexcept
{
first.swap(second);
}
const Pair& operator*() const
{
return _pair;
}
const Pair* operator->() const
{
return &_pair;
}
Iterator& operator++()
{
if (_current && _current->next)
{
_current = _current->next;
}
else
{
do _current = _nodes[++_index];
while (! _current);
}
_pair.key = _current->key;
_pair.value = _current->value;
return *this;
}
Iterator& operator++(int)
{
auto previous = *this;
++*this;
return previous;
}
Iterator& operator--()
{
if (_current->previous) _current = _current->previous;
else
{
while (! _nodes[_index]) --_index;
while (_current->next) _current = _current->next;
}
_pair.key = _current->key;
_pair.value = _current->value;
return *this;
}
Iterator& operator--(int)
{
auto following = *this;
--*this;
return following;
}
bool operator==(const Iterator& other) const
{
return _current == other._current;
}
bool operator!=(const Iterator& other) const
{
return _current != other._current;
}
private:
size_t _index;
Node* _current;
Node** _nodes;
Pair _pair;
};
using ConstIterator = Iterator;
explicit HashMap(const hash_function_t& hash_function = std::hash<Key>(),
size_t capacity = minimum_capacity)
: _nodes(new Node*[capacity / _bin_size + 1])
, _hash_function(hash_function)
, _threshold(capacity)
, _capacity(_threshold/_bin_size)
, _size(0)
{
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
}
_nodes[_capacity] = new Node;
}
explicit HashMap(std::initializer_list<Pair> items,
const hash_function_t& hash_function = std::hash<Key>(),
size_t capacity = minimum_capacity)
: _hash_function(hash_function)
, _size(0)
{
_threshold = std::max(items.size(), minimum_capacity);
_capacity = _threshold / _bin_size;
_nodes = new Node*[_capacity + 1];
for (size_t i = 0, end = _capacity; i < end; ++i)
{
_nodes[i] = nullptr;
}
for (const auto& i : items) insert(i);
_nodes[_capacity] = new Node;
}
HashMap(const HashMap& other)
: _nodes(new Node*[other._capacity])
, _hash_function(other._hash_function)
, _capacity(other._capacity)
, _threshold(other._threshold)
{
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
for (Node* node = other._nodes[i]; node; node = node->next)
{
insert(node->item);
}
}
_nodes[_capacity] = new Node;
}
HashMap(HashMap&& other) noexcept
: HashMap()
{
swap(other);
}
HashMap& operator=(HashMap other)
{
swap(other);
}
void swap(HashMap& other) noexcept
{
// Enable ADL
using std::swap;
swap(_nodes, other._nodes);
swap(_hash_function, other._hash_function);
swap(_capacity, other._capacity);
swap(_size, other._size);
swap(_threshold, other._threshold);
}
friend void swap(HashMap& first, HashMap& second) noexcept
{
first.swap(second);
}
~HashMap()
{
clear();
}
Iterator begin() const
{
if (is_empty()) return end();
return {0, _nodes[0], _nodes};
}
Iterator end() const
{
return {_capacity, _nodes[_capacity], _nodes};
}
Iterator insert(const Key& key, const Value& value)
{
size_t index = _hash(key);
Node* node = _nodes[index];
while (node)
{
if (node->key == key)
{
node->value = value;
return {index, node, _nodes};
}
node = node->next;
}
node = new Node(key, value, _nodes[index]);
if (_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
if (++_size == _threshold) _resize();
return {index, node, _nodes};
}
Iterator insert(const Pair& pair)
{
return insert(pair.key, pair.value);
}
template<typename Itr>
void insert(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
insert(*begin);
}
}
std::pair<Iterator, bool> insert_or_assign(const Key& key,
const Value& value)
{
size_t index = _hash(key);
Node* node = _nodes[index];
for ( ; node; node = node->next)
{
if (node->key == key)
{
node->value = value;
return {{index, node, _nodes}, false};
}
}
node = new Node(key, value);
node->next = _nodes[index];
if(_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
if (++_size == _threshold) _resize();
return {{index, node, _nodes}, true};
}
bool contains(const Key& key) const
{
size_t index = _hash(key);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->key == key) return true;
}
return false;
}
Value& at(const Key& key)
{
size_t index = _hash(key);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->key == key) return node->value;
}
throw std::invalid_argument("No such key!");
}
const Value& at(const Key& key) const
{
size_t index = _hash(key);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->key == key) return node->value;
}
throw std::invalid_argument("No such key!");
}
Iterator find(const Key& key) const
{
size_t index = _hash(key);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->item == key)
{
return {index, node, _nodes};
}
}
return end();
}
size_t count(const Key& key) const
{
auto itr = find(key);
return itr != end();
}
Value& operator[](const Key& key)
{
size_t index = _hash(key);
Node* node = _nodes[index];
for ( ; node; node = node->next)
{
if (node->key == key) return node->value;
}
node = new Node(key);
node->next = _nodes[index];
if(_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
if (++_size == _threshold) _resize();
return node->value;
}
void erase(const Key& key)
{
size_t index = _hash(key);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->key == key)
{
if (node->previous) node->previous->next = node->next;
else _nodes[index] = node->next;
delete node;
if (--_size == _threshold/4) _resize();
return;
}
}
throw std::invalid_argument("No such item!");
}
template<typename Itr>
void erase(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
erase(*begin);
}
}
void clear()
{
for (size_t i = 0; i < _capacity; ++i)
{
for (Node* node = _nodes[i], *next; node; node = next)
{
next = node->next;
delete node;
}
_nodes[i] = nullptr;
}
_size = 0;
}
size_t size() const
{
return _size;
}
bool is_empty() const
{
return _size == 0;
}
const hash_function_t& hash_function() const
{
return _hash_function;
}
private:
static const size_t _bin_size = 4;
size_t _hash(const Key& key) const
{
// Modulo hashing
return _hash_function(key) % _capacity;
}
size_t _universal_hash(const Key& key) const
{
// Universal hashing
static const size_t a = 123; // random value
static const size_t b = 13; // random value
static const size_t p = 69; // prime larger than number of bits
// hash(k) = [(a * k + b) mod p] mod m
// where m = size of the array used for separate-chaining
return ((a * _hash_function(key) + b) % p) % _capacity;
}
size_t _multiply_shift_hash(const Key& key) const
{
// Multiply-Shift scheme
// a is any integer (should be odd and not close to power of 2)
static const size_t a = 99;
// w is the word size (the number of bits of the integer)
static const size_t w = sizeof(size_t) * 8;
// r is the number of bits you want to return
// enough to hold max-number of values / _bin_size (4)
// Should compute during re-sizing if actually using
static const size_t r = std::log2(_capacity);
// hash(k) = [(a * k) mod 2^w] >> (w - r)
return (a * _hash_function(key)) >> (w - r);
}
void _resize()
{
size_t new_size = _size * 2;
if (new_size < minimum_capacity) return;
size_t old_capacity = _capacity;
_capacity = new_size / _bin_size;
_threshold = new_size;
Node** old = _nodes;
_nodes = new Node*[_capacity + 1];
// for the end node
_nodes[_capacity] = old[old_capacity];
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
}
_rehash(old, old_capacity);
delete [] old;
}
void _rehash(Node** old, size_t old_capacity)
{
for (size_t i = 0; i < old_capacity; ++i)
{
for (Node* node = old[i], *next; node; node = next)
{
size_t index = _hash(node->key);
next = node->next;
node->next = _nodes[index];
if (_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
}
old[i] = nullptr;
}
}
Node** _nodes;
size_t _threshold;
size_t _capacity;
size_t _size;
hash_function_t _hash_function;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment