Created
September 23, 2015 01:07
-
-
Save goldsborough/199149bfe954ae016872 to your computer and use it in GitHub Desktop.
A hash-map implementation with iterator support and multiple hashing options.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename Key, typename Value> | |
class HashMap | |
{ | |
private: | |
struct Node | |
{ | |
Node(const Key& k = Key(), | |
const Value& v = Value(), | |
Node* following = nullptr, | |
Node* prev = nullptr) | |
: key(k) | |
, value(v) | |
, next(following) | |
, previous(prev) | |
{ } | |
Key key; | |
Value value; | |
Node* next; | |
Node* previous; | |
}; | |
public: | |
using size_t = std::size_t; | |
using hash_function_t = std::function<size_t(const Key&)>; | |
static const size_t minimum_capacity = 16; | |
struct Pair | |
{ | |
Pair(const Key& k = Key(), | |
const Value& v = Value()) | |
: key(k), value(v) | |
{ } | |
Key key; | |
Value value; | |
}; | |
class Iterator : | |
public std::iterator<std::bidirectional_iterator_tag, Pair> | |
{ | |
public: | |
Iterator(size_t index = 0, | |
Node* node = nullptr, | |
Node** nodes = nullptr) | |
: _index(index) | |
, _current(node) | |
, _nodes(nodes) | |
{ | |
if (node) | |
{ | |
_pair.key = node->key; | |
_pair.value = node->value; | |
} | |
} | |
Iterator(const Iterator& other) | |
: _index(other._index) | |
, _current(other._current) | |
, _nodes(other._nodes) | |
, _pair(other._pair) | |
{ } | |
Iterator(Iterator&& other) | |
: Iterator() | |
{ | |
swap(other); | |
} | |
Iterator& operator=(Iterator other) | |
{ | |
swap(other); | |
return *this; | |
} | |
void swap(Iterator& other) noexcept | |
{ | |
using std::swap; | |
swap(_index, other._index); | |
swap(_current, other._current); | |
swap(_nodes, other._nodes); | |
} | |
friend void swap(Iterator& first, Iterator& second) noexcept | |
{ | |
first.swap(second); | |
} | |
const Pair& operator*() const | |
{ | |
return _pair; | |
} | |
const Pair* operator->() const | |
{ | |
return &_pair; | |
} | |
Iterator& operator++() | |
{ | |
if (_current && _current->next) | |
{ | |
_current = _current->next; | |
} | |
else | |
{ | |
do _current = _nodes[++_index]; | |
while (! _current); | |
} | |
_pair.key = _current->key; | |
_pair.value = _current->value; | |
return *this; | |
} | |
Iterator& operator++(int) | |
{ | |
auto previous = *this; | |
++*this; | |
return previous; | |
} | |
Iterator& operator--() | |
{ | |
if (_current->previous) _current = _current->previous; | |
else | |
{ | |
while (! _nodes[_index]) --_index; | |
while (_current->next) _current = _current->next; | |
} | |
_pair.key = _current->key; | |
_pair.value = _current->value; | |
return *this; | |
} | |
Iterator& operator--(int) | |
{ | |
auto following = *this; | |
--*this; | |
return following; | |
} | |
bool operator==(const Iterator& other) const | |
{ | |
return _current == other._current; | |
} | |
bool operator!=(const Iterator& other) const | |
{ | |
return _current != other._current; | |
} | |
private: | |
size_t _index; | |
Node* _current; | |
Node** _nodes; | |
Pair _pair; | |
}; | |
using ConstIterator = Iterator; | |
explicit HashMap(const hash_function_t& hash_function = std::hash<Key>(), | |
size_t capacity = minimum_capacity) | |
: _nodes(new Node*[capacity / _bin_size + 1]) | |
, _hash_function(hash_function) | |
, _threshold(capacity) | |
, _capacity(_threshold/_bin_size) | |
, _size(0) | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
_nodes[_capacity] = new Node; | |
} | |
explicit HashMap(std::initializer_list<Pair> items, | |
const hash_function_t& hash_function = std::hash<Key>(), | |
size_t capacity = minimum_capacity) | |
: _hash_function(hash_function) | |
, _size(0) | |
{ | |
_threshold = std::max(items.size(), minimum_capacity); | |
_capacity = _threshold / _bin_size; | |
_nodes = new Node*[_capacity + 1]; | |
for (size_t i = 0, end = _capacity; i < end; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
for (const auto& i : items) insert(i); | |
_nodes[_capacity] = new Node; | |
} | |
HashMap(const HashMap& other) | |
: _nodes(new Node*[other._capacity]) | |
, _hash_function(other._hash_function) | |
, _capacity(other._capacity) | |
, _threshold(other._threshold) | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
for (Node* node = other._nodes[i]; node; node = node->next) | |
{ | |
insert(node->item); | |
} | |
} | |
_nodes[_capacity] = new Node; | |
} | |
HashMap(HashMap&& other) noexcept | |
: HashMap() | |
{ | |
swap(other); | |
} | |
HashMap& operator=(HashMap other) | |
{ | |
swap(other); | |
} | |
void swap(HashMap& other) noexcept | |
{ | |
// Enable ADL | |
using std::swap; | |
swap(_nodes, other._nodes); | |
swap(_hash_function, other._hash_function); | |
swap(_capacity, other._capacity); | |
swap(_size, other._size); | |
swap(_threshold, other._threshold); | |
} | |
friend void swap(HashMap& first, HashMap& second) noexcept | |
{ | |
first.swap(second); | |
} | |
~HashMap() | |
{ | |
clear(); | |
} | |
Iterator begin() const | |
{ | |
if (is_empty()) return end(); | |
return {0, _nodes[0], _nodes}; | |
} | |
Iterator end() const | |
{ | |
return {_capacity, _nodes[_capacity], _nodes}; | |
} | |
Iterator insert(const Key& key, const Value& value) | |
{ | |
size_t index = _hash(key); | |
Node* node = _nodes[index]; | |
while (node) | |
{ | |
if (node->key == key) | |
{ | |
node->value = value; | |
return {index, node, _nodes}; | |
} | |
node = node->next; | |
} | |
node = new Node(key, value, _nodes[index]); | |
if (_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
if (++_size == _threshold) _resize(); | |
return {index, node, _nodes}; | |
} | |
Iterator insert(const Pair& pair) | |
{ | |
return insert(pair.key, pair.value); | |
} | |
template<typename Itr> | |
void insert(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
insert(*begin); | |
} | |
} | |
std::pair<Iterator, bool> insert_or_assign(const Key& key, | |
const Value& value) | |
{ | |
size_t index = _hash(key); | |
Node* node = _nodes[index]; | |
for ( ; node; node = node->next) | |
{ | |
if (node->key == key) | |
{ | |
node->value = value; | |
return {{index, node, _nodes}, false}; | |
} | |
} | |
node = new Node(key, value); | |
node->next = _nodes[index]; | |
if(_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
if (++_size == _threshold) _resize(); | |
return {{index, node, _nodes}, true}; | |
} | |
bool contains(const Key& key) const | |
{ | |
size_t index = _hash(key); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->key == key) return true; | |
} | |
return false; | |
} | |
Value& at(const Key& key) | |
{ | |
size_t index = _hash(key); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->key == key) return node->value; | |
} | |
throw std::invalid_argument("No such key!"); | |
} | |
const Value& at(const Key& key) const | |
{ | |
size_t index = _hash(key); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->key == key) return node->value; | |
} | |
throw std::invalid_argument("No such key!"); | |
} | |
Iterator find(const Key& key) const | |
{ | |
size_t index = _hash(key); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->item == key) | |
{ | |
return {index, node, _nodes}; | |
} | |
} | |
return end(); | |
} | |
size_t count(const Key& key) const | |
{ | |
auto itr = find(key); | |
return itr != end(); | |
} | |
Value& operator[](const Key& key) | |
{ | |
size_t index = _hash(key); | |
Node* node = _nodes[index]; | |
for ( ; node; node = node->next) | |
{ | |
if (node->key == key) return node->value; | |
} | |
node = new Node(key); | |
node->next = _nodes[index]; | |
if(_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
if (++_size == _threshold) _resize(); | |
return node->value; | |
} | |
void erase(const Key& key) | |
{ | |
size_t index = _hash(key); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->key == key) | |
{ | |
if (node->previous) node->previous->next = node->next; | |
else _nodes[index] = node->next; | |
delete node; | |
if (--_size == _threshold/4) _resize(); | |
return; | |
} | |
} | |
throw std::invalid_argument("No such item!"); | |
} | |
template<typename Itr> | |
void erase(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
erase(*begin); | |
} | |
} | |
void clear() | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
for (Node* node = _nodes[i], *next; node; node = next) | |
{ | |
next = node->next; | |
delete node; | |
} | |
_nodes[i] = nullptr; | |
} | |
_size = 0; | |
} | |
size_t size() const | |
{ | |
return _size; | |
} | |
bool is_empty() const | |
{ | |
return _size == 0; | |
} | |
const hash_function_t& hash_function() const | |
{ | |
return _hash_function; | |
} | |
private: | |
static const size_t _bin_size = 4; | |
size_t _hash(const Key& key) const | |
{ | |
// Modulo hashing | |
return _hash_function(key) % _capacity; | |
} | |
size_t _universal_hash(const Key& key) const | |
{ | |
// Universal hashing | |
static const size_t a = 123; // random value | |
static const size_t b = 13; // random value | |
static const size_t p = 69; // prime larger than number of bits | |
// hash(k) = [(a * k + b) mod p] mod m | |
// where m = size of the array used for separate-chaining | |
return ((a * _hash_function(key) + b) % p) % _capacity; | |
} | |
size_t _multiply_shift_hash(const Key& key) const | |
{ | |
// Multiply-Shift scheme | |
// a is any integer (should be odd and not close to power of 2) | |
static const size_t a = 99; | |
// w is the word size (the number of bits of the integer) | |
static const size_t w = sizeof(size_t) * 8; | |
// r is the number of bits you want to return | |
// enough to hold max-number of values / _bin_size (4) | |
// Should compute during re-sizing if actually using | |
static const size_t r = std::log2(_capacity); | |
// hash(k) = [(a * k) mod 2^w] >> (w - r) | |
return (a * _hash_function(key)) >> (w - r); | |
} | |
void _resize() | |
{ | |
size_t new_size = _size * 2; | |
if (new_size < minimum_capacity) return; | |
size_t old_capacity = _capacity; | |
_capacity = new_size / _bin_size; | |
_threshold = new_size; | |
Node** old = _nodes; | |
_nodes = new Node*[_capacity + 1]; | |
// for the end node | |
_nodes[_capacity] = old[old_capacity]; | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
_rehash(old, old_capacity); | |
delete [] old; | |
} | |
void _rehash(Node** old, size_t old_capacity) | |
{ | |
for (size_t i = 0; i < old_capacity; ++i) | |
{ | |
for (Node* node = old[i], *next; node; node = next) | |
{ | |
size_t index = _hash(node->key); | |
next = node->next; | |
node->next = _nodes[index]; | |
if (_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
} | |
old[i] = nullptr; | |
} | |
} | |
Node** _nodes; | |
size_t _threshold; | |
size_t _capacity; | |
size_t _size; | |
hash_function_t _hash_function; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment