Skip to content

Instantly share code, notes, and snippets.

@goldsborough
Created September 22, 2015 23:34
Show Gist options
  • Save goldsborough/46f1691a6ed0f492d5d6 to your computer and use it in GitHub Desktop.
Save goldsborough/46f1691a6ed0f492d5d6 to your computer and use it in GitHub Desktop.
A hash-set implementation with iterator support.
template<typename T>
class HashSet
{
private:
struct Node
{
Node(const T& t = T(),
Node* following = nullptr,
Node* prev = nullptr)
: item(t)
, next(following)
, previous(prev)
{ }
T item;
Node* next;
Node* previous;
};
public:
using size_t = std::size_t;
using hash_function_t = std::function<size_t(const T&)>;
static const size_t minimum_capacity = 16;
class iterator
{
public:
iterator(size_t index = 0,
Node* node = nullptr,
Node** nodes = nullptr)
: _index(index)
, _current(node)
, _nodes(nodes)
{ }
iterator(const iterator& other)
: _index(other._index)
, _current(other._current)
, _nodes(other._nodes)
{ }
iterator(iterator&& other)
: iterator()
{
swap(other);
}
iterator& operator=(iterator other)
{
swap(other);
return *this;
}
void swap(iterator& other) noexcept
{
using std::swap;
swap(_index, other._index);
swap(_current, other._current);
swap(_nodes, other._nodes);
}
friend void swap(iterator& first, iterator& second) noexcept
{
first.swap(second);
}
const T& operator*() const
{
return _current->item;
}
const T* operator->() const
{
return &_current->item;
}
iterator& operator++()
{
if (_current && _current->next)
{
_current = _current->next;
}
else
{
do _current = _nodes[++_index];
while (! _current);
}
return *this;
}
iterator& operator++(int)
{
auto previous = *this;
++*this;
return previous;
}
iterator& operator--()
{
if (_current->previous) _current = _current->previous;
else
{
while (! _nodes[_index]) --_index;
while (_current->next) _current = _current->next;
}
return *this;
}
iterator& operator--(int)
{
auto following = *this;
--*this;
return following;
}
bool operator==(const iterator& other) const
{
return _current == other._current;
}
bool operator!=(const iterator& other) const
{
return _current != other._current;
}
private:
size_t _index;
Node* _current;
Node** _nodes;
};
using const_iterator = iterator;
HashSet(const hash_function_t& hash_function = std::hash<T>(),
size_t capacity = minimum_capacity)
: _nodes(new Node*[capacity / _bin_size + 1])
, _hash_function(hash_function)
, _threshold(capacity)
, _capacity(_threshold/_bin_size)
, _size(0)
{
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
}
_nodes[_capacity] = new Node;
}
HashSet(const std::initializer_list<T>& items,
const hash_function_t& hash_function = std::hash<T>(),
size_t capacity = minimum_capacity)
: _hash_function(hash_function)
, _size(0)
{
_threshold = std::max(items.size(), capacity);
_capacity = _threshold / _bin_size;
_nodes = new Node*[_capacity + 1];
for (size_t i = 0, end = _capacity; i < end; ++i)
{
_nodes[i] = nullptr;
}
for (const auto& i : items) insert(i);
_nodes[_capacity] = new Node;
}
HashSet(const HashSet& other)
: _nodes(new Node*[other._capacity])
, _hash_function(other._hash_function)
, _capacity(other._capacity)
, _threshold(other._threshold)
{
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
for (Node* node = other._nodes[i]; node; node = node->next)
{
insert(node->item);
}
}
_nodes[_capacity] = new Node;
}
HashSet(HashSet&& other) noexcept
: HashSet()
{
swap(other);
}
HashSet& operator=(HashSet other)
{
swap(other);
}
void swap(HashSet& other) noexcept
{
// Enable ADL
using std::swap;
swap(_nodes, other._nodes);
swap(_hash_function, other._hash_function);
swap(_capacity, other._capacity);
swap(_size, other._size);
swap(_threshold, other._threshold);
}
friend void swap(HashSet& first, HashSet& second) noexcept
{
first.swap(second);
}
~HashSet()
{
clear();
}
iterator begin() const
{
if (is_empty()) return end();
return {0, _nodes[0], _nodes};
}
iterator end() const
{
return {_capacity, _nodes[_capacity], _nodes};
}
void insert(const T& item)
{
size_t index = _hash(item);
Node* node = _nodes[index];
while (node)
{
if (node->item == item) return;
node = node->next;
}
node = new Node(item, _nodes[index]);
if (_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
if (++_size == _threshold) _resize();
}
template<typename Itr>
void insert(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
insert(*begin);
}
}
bool contains(const T& item) const
{
size_t index = _hash(item);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->item == item) return true;
}
return false;
}
iterator find(const T& item) const
{
size_t index = _hash(item);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->item == item)
{
return {index, node, _nodes};
}
}
return end();
}
size_t count(const T& item) const
{
auto itr = find(item);
return itr != end();
}
void erase(const T& item)
{
size_t index = _hash(item);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->item == item)
{
if (node->previous) node->previous->next = node->next;
else _nodes[index] = node->next;
delete node;
if (--_size == _threshold/4) _resize();
return;
}
}
throw std::invalid_argument("No such item!");
}
template<typename Itr>
void erase(Itr begin, Itr end)
{
for ( ; begin != end; ++begin)
{
erase(*begin);
}
}
void clear()
{
for (size_t i = 0; i < _capacity; ++i)
{
for (Node* node = _nodes[i], *next; node; node = next)
{
next = node->next;
delete node;
}
_nodes[i] = nullptr;
}
_size = 0;
}
size_t size() const
{
return _size;
}
bool is_empty() const
{
return _size == 0;
}
const hash_function_t& hash_function() const
{
return _hash_function;
}
private:
static const size_t _bin_size = 4;
size_t _hash(const T& item) const
{
return _hash_function(item) % _capacity;
}
void _resize()
{
size_t new_size = _size * 2;
if (new_size < minimum_capacity) return;
size_t old_capacity = _capacity;
_capacity = new_size / _bin_size;
_threshold = new_size;
Node** old = _nodes;
_nodes = new Node*[_capacity + 1];
// for the end node
_nodes[_capacity] = old[old_capacity];
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
}
_rehash(old, old_capacity);
delete [] old;
}
void _rehash(Node** old, size_t old_capacity)
{
for (size_t i = 0; i < old_capacity; ++i)
{
for (Node* node = old[i], *next; node; node = next)
{
size_t index = _hash(node->item);
next = node->next;
node->next = _nodes[index];
if (_nodes[index]) _nodes[index]->previous = node;
_nodes[index] = node;
}
old[i] = nullptr;
}
}
Node** _nodes;
size_t _threshold;
size_t _capacity;
size_t _size;
hash_function_t _hash_function;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment