Skip to content

Instantly share code, notes, and snippets.

@goldsborough
Created September 22, 2015 22:23
Show Gist options
  • Save goldsborough/ebe3ad64bd0642b00689 to your computer and use it in GitHub Desktop.
Save goldsborough/ebe3ad64bd0642b00689 to your computer and use it in GitHub Desktop.
A hash-set implementation.
template<typename T>
class HashSet
{
private:
using size_t = std::size_t;
using hash_function_t = std::function<size_t(const T&)>;
static const size_t minimum_capacity = 16;
HashSet(const std::initializer_list<T>& items,
const hash_function_t& hash_function = std::hash<T>(),
size_t capacity = minimum_capacity)
: _hash_function(hash_function)
, _size(0)
{
_threshold = std::max(items.size(), capacity);
_capacity = _threshold / _bin_size;
_nodes = new Node*[_capacity];
for (size_t i = 0, end = _capacity; i < end; ++i)
{
_nodes[i] = nullptr;
}
for (const auto& i : items) insert(i);
}
HashSet(const hash_function_t& hash_function = std::hash<T>(),
size_t capacity = minimum_capacity)
: _nodes(new Node*[capacity / _bin_size])
, _hash_function(hash_function)
, _threshold(capacity)
, _capacity(capacity/_bin_size)
, _size(0)
{
for (size_t i = 0, end = _capacity; i < end; ++i)
{
_nodes[i] = nullptr;
}
}
HashSet(const HashSet& other)
: _nodes(new Node*[other._capacity])
, _hash_function(other._hash_function)
, _capacity(other._capacity)
, _threshold(other._threshold)
{
for (size_t i = 0, end = _capacity/_bin_size; i < end; ++i)
{
for (Node* node = _nodes[i]; node; node = node->next)
{
insert(node->item);
}
}
}
HashSet(HashSet&& other) noexcept
: HashSet()
{
swap(other);
}
HashSet& operator=(HashSet other)
{
swap(other);
}
void swap(HashSet& other) noexcept
{
// Enable ADL
using std::swap;
swap(_nodes, other._nodes);
swap(_hash_function, other._hash_function);
swap(_capacity, other._capacity);
swap(_size, other._size);
swap(_threshold, other._threshold);
}
friend void swap(HashSet& first, HashSet& second) noexcept
{
first.swap(second);
}
~HashSet()
{
clear();
}
void insert(const T& item)
{
size_t index = _hash(item);
Node* node = _nodes[index];
while (node)
{
if (node->item == item) return;
node = node->next;
}
node = new Node(item, _nodes[index]);
_nodes[index] = node;
if (++_size == _threshold) _resize();
}
bool contains(const T& item)
{
size_t index = _hash(item);
for (Node* node = _nodes[index]; node; node = node->next)
{
if (node->item == item) return true;
}
return false;
}
void erase(const T& item)
{
size_t index = _hash(item);
Node* node = _nodes[index];
Node* previous = nullptr;
for ( ; node; previous = node, node = node->next)
{
if (node->item == item)
{
if (previous) previous->next = node->next;
else _nodes[index] = node->next;
delete node;
if (--_size == _threshold/4) _resize();
return;
}
}
throw std::invalid_argument("No such item!");
}
void clear()
{
for (size_t i = 0; i < _capacity; ++i)
{
for (Node* node = _nodes[i], *next; node; node = next)
{
next = node->next;
delete node;
}
_nodes[i] = nullptr;
}
_size = 0;
}
size_t size() const
{
return _size;
}
bool is_empty() const
{
return _size == 0;
}
private:
static const size_t _bin_size = 4;
struct Node
{
Node(const T& t, Node* node)
: item(t)
, next(node)
{ }
T item;
Node* next;
};
size_t _hash(const T& item) const
{
return _hash_function(item) % _capacity;
}
void _resize()
{
size_t new_size = _size * 2;
if (new_size < minimum_capacity) return;
_capacity = new_size / _bin_size;
Node** old = _nodes;
_nodes = new Node*[_capacity];
for (size_t i = 0; i < _capacity; ++i)
{
_nodes[i] = nullptr;
}
_rehash(old, _threshold / _bin_size);
_threshold = new_size;
delete [] old;
}
void _rehash(Node** old, size_t old_capacity)
{
for (size_t i = 0; i < old_capacity; ++i)
{
for (Node* node = old[i], *next; node; node = next)
{
size_t index = _hash(node->item);
next = node->next;
node->next = _nodes[index];
_nodes[index] = node;
}
old[i] = nullptr;
}
}
Node** _nodes;
size_t _capacity;
size_t _size;
size_t _threshold;
hash_function_t _hash_function;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment