Created
September 22, 2015 23:34
-
-
Save goldsborough/46f1691a6ed0f492d5d6 to your computer and use it in GitHub Desktop.
A hash-set implementation with iterator support.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename T> | |
class HashSet | |
{ | |
private: | |
struct Node | |
{ | |
Node(const T& t = T(), | |
Node* following = nullptr, | |
Node* prev = nullptr) | |
: item(t) | |
, next(following) | |
, previous(prev) | |
{ } | |
T item; | |
Node* next; | |
Node* previous; | |
}; | |
public: | |
using size_t = std::size_t; | |
using hash_function_t = std::function<size_t(const T&)>; | |
static const size_t minimum_capacity = 16; | |
class iterator | |
{ | |
public: | |
iterator(size_t index = 0, | |
Node* node = nullptr, | |
Node** nodes = nullptr) | |
: _index(index) | |
, _current(node) | |
, _nodes(nodes) | |
{ } | |
iterator(const iterator& other) | |
: _index(other._index) | |
, _current(other._current) | |
, _nodes(other._nodes) | |
{ } | |
iterator(iterator&& other) | |
: iterator() | |
{ | |
swap(other); | |
} | |
iterator& operator=(iterator other) | |
{ | |
swap(other); | |
return *this; | |
} | |
void swap(iterator& other) noexcept | |
{ | |
using std::swap; | |
swap(_index, other._index); | |
swap(_current, other._current); | |
swap(_nodes, other._nodes); | |
} | |
friend void swap(iterator& first, iterator& second) noexcept | |
{ | |
first.swap(second); | |
} | |
const T& operator*() const | |
{ | |
return _current->item; | |
} | |
const T* operator->() const | |
{ | |
return &_current->item; | |
} | |
iterator& operator++() | |
{ | |
if (_current && _current->next) | |
{ | |
_current = _current->next; | |
} | |
else | |
{ | |
do _current = _nodes[++_index]; | |
while (! _current); | |
} | |
return *this; | |
} | |
iterator& operator++(int) | |
{ | |
auto previous = *this; | |
++*this; | |
return previous; | |
} | |
iterator& operator--() | |
{ | |
if (_current->previous) _current = _current->previous; | |
else | |
{ | |
while (! _nodes[_index]) --_index; | |
while (_current->next) _current = _current->next; | |
} | |
return *this; | |
} | |
iterator& operator--(int) | |
{ | |
auto following = *this; | |
--*this; | |
return following; | |
} | |
bool operator==(const iterator& other) const | |
{ | |
return _current == other._current; | |
} | |
bool operator!=(const iterator& other) const | |
{ | |
return _current != other._current; | |
} | |
private: | |
size_t _index; | |
Node* _current; | |
Node** _nodes; | |
}; | |
using const_iterator = iterator; | |
HashSet(const hash_function_t& hash_function = std::hash<T>(), | |
size_t capacity = minimum_capacity) | |
: _nodes(new Node*[capacity / _bin_size + 1]) | |
, _hash_function(hash_function) | |
, _threshold(capacity) | |
, _capacity(_threshold/_bin_size) | |
, _size(0) | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
_nodes[_capacity] = new Node; | |
} | |
HashSet(const std::initializer_list<T>& items, | |
const hash_function_t& hash_function = std::hash<T>(), | |
size_t capacity = minimum_capacity) | |
: _hash_function(hash_function) | |
, _size(0) | |
{ | |
_threshold = std::max(items.size(), capacity); | |
_capacity = _threshold / _bin_size; | |
_nodes = new Node*[_capacity + 1]; | |
for (size_t i = 0, end = _capacity; i < end; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
for (const auto& i : items) insert(i); | |
_nodes[_capacity] = new Node; | |
} | |
HashSet(const HashSet& other) | |
: _nodes(new Node*[other._capacity]) | |
, _hash_function(other._hash_function) | |
, _capacity(other._capacity) | |
, _threshold(other._threshold) | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
for (Node* node = other._nodes[i]; node; node = node->next) | |
{ | |
insert(node->item); | |
} | |
} | |
_nodes[_capacity] = new Node; | |
} | |
HashSet(HashSet&& other) noexcept | |
: HashSet() | |
{ | |
swap(other); | |
} | |
HashSet& operator=(HashSet other) | |
{ | |
swap(other); | |
} | |
void swap(HashSet& other) noexcept | |
{ | |
// Enable ADL | |
using std::swap; | |
swap(_nodes, other._nodes); | |
swap(_hash_function, other._hash_function); | |
swap(_capacity, other._capacity); | |
swap(_size, other._size); | |
swap(_threshold, other._threshold); | |
} | |
friend void swap(HashSet& first, HashSet& second) noexcept | |
{ | |
first.swap(second); | |
} | |
~HashSet() | |
{ | |
clear(); | |
} | |
iterator begin() const | |
{ | |
if (is_empty()) return end(); | |
return {0, _nodes[0], _nodes}; | |
} | |
iterator end() const | |
{ | |
return {_capacity, _nodes[_capacity], _nodes}; | |
} | |
void insert(const T& item) | |
{ | |
size_t index = _hash(item); | |
Node* node = _nodes[index]; | |
while (node) | |
{ | |
if (node->item == item) return; | |
node = node->next; | |
} | |
node = new Node(item, _nodes[index]); | |
if (_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
if (++_size == _threshold) _resize(); | |
} | |
template<typename Itr> | |
void insert(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
insert(*begin); | |
} | |
} | |
bool contains(const T& item) const | |
{ | |
size_t index = _hash(item); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->item == item) return true; | |
} | |
return false; | |
} | |
iterator find(const T& item) const | |
{ | |
size_t index = _hash(item); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->item == item) | |
{ | |
return {index, node, _nodes}; | |
} | |
} | |
return end(); | |
} | |
size_t count(const T& item) const | |
{ | |
auto itr = find(item); | |
return itr != end(); | |
} | |
void erase(const T& item) | |
{ | |
size_t index = _hash(item); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->item == item) | |
{ | |
if (node->previous) node->previous->next = node->next; | |
else _nodes[index] = node->next; | |
delete node; | |
if (--_size == _threshold/4) _resize(); | |
return; | |
} | |
} | |
throw std::invalid_argument("No such item!"); | |
} | |
template<typename Itr> | |
void erase(Itr begin, Itr end) | |
{ | |
for ( ; begin != end; ++begin) | |
{ | |
erase(*begin); | |
} | |
} | |
void clear() | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
for (Node* node = _nodes[i], *next; node; node = next) | |
{ | |
next = node->next; | |
delete node; | |
} | |
_nodes[i] = nullptr; | |
} | |
_size = 0; | |
} | |
size_t size() const | |
{ | |
return _size; | |
} | |
bool is_empty() const | |
{ | |
return _size == 0; | |
} | |
const hash_function_t& hash_function() const | |
{ | |
return _hash_function; | |
} | |
private: | |
static const size_t _bin_size = 4; | |
size_t _hash(const T& item) const | |
{ | |
return _hash_function(item) % _capacity; | |
} | |
void _resize() | |
{ | |
size_t new_size = _size * 2; | |
if (new_size < minimum_capacity) return; | |
size_t old_capacity = _capacity; | |
_capacity = new_size / _bin_size; | |
_threshold = new_size; | |
Node** old = _nodes; | |
_nodes = new Node*[_capacity + 1]; | |
// for the end node | |
_nodes[_capacity] = old[old_capacity]; | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
_rehash(old, old_capacity); | |
delete [] old; | |
} | |
void _rehash(Node** old, size_t old_capacity) | |
{ | |
for (size_t i = 0; i < old_capacity; ++i) | |
{ | |
for (Node* node = old[i], *next; node; node = next) | |
{ | |
size_t index = _hash(node->item); | |
next = node->next; | |
node->next = _nodes[index]; | |
if (_nodes[index]) _nodes[index]->previous = node; | |
_nodes[index] = node; | |
} | |
old[i] = nullptr; | |
} | |
} | |
Node** _nodes; | |
size_t _threshold; | |
size_t _capacity; | |
size_t _size; | |
hash_function_t _hash_function; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment