Created
September 22, 2015 22:23
-
-
Save goldsborough/ebe3ad64bd0642b00689 to your computer and use it in GitHub Desktop.
A hash-set implementation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename T> | |
class HashSet | |
{ | |
private: | |
using size_t = std::size_t; | |
using hash_function_t = std::function<size_t(const T&)>; | |
static const size_t minimum_capacity = 16; | |
HashSet(const std::initializer_list<T>& items, | |
const hash_function_t& hash_function = std::hash<T>(), | |
size_t capacity = minimum_capacity) | |
: _hash_function(hash_function) | |
, _size(0) | |
{ | |
_threshold = std::max(items.size(), capacity); | |
_capacity = _threshold / _bin_size; | |
_nodes = new Node*[_capacity]; | |
for (size_t i = 0, end = _capacity; i < end; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
for (const auto& i : items) insert(i); | |
} | |
HashSet(const hash_function_t& hash_function = std::hash<T>(), | |
size_t capacity = minimum_capacity) | |
: _nodes(new Node*[capacity / _bin_size]) | |
, _hash_function(hash_function) | |
, _threshold(capacity) | |
, _capacity(capacity/_bin_size) | |
, _size(0) | |
{ | |
for (size_t i = 0, end = _capacity; i < end; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
} | |
HashSet(const HashSet& other) | |
: _nodes(new Node*[other._capacity]) | |
, _hash_function(other._hash_function) | |
, _capacity(other._capacity) | |
, _threshold(other._threshold) | |
{ | |
for (size_t i = 0, end = _capacity/_bin_size; i < end; ++i) | |
{ | |
for (Node* node = _nodes[i]; node; node = node->next) | |
{ | |
insert(node->item); | |
} | |
} | |
} | |
HashSet(HashSet&& other) noexcept | |
: HashSet() | |
{ | |
swap(other); | |
} | |
HashSet& operator=(HashSet other) | |
{ | |
swap(other); | |
} | |
void swap(HashSet& other) noexcept | |
{ | |
// Enable ADL | |
using std::swap; | |
swap(_nodes, other._nodes); | |
swap(_hash_function, other._hash_function); | |
swap(_capacity, other._capacity); | |
swap(_size, other._size); | |
swap(_threshold, other._threshold); | |
} | |
friend void swap(HashSet& first, HashSet& second) noexcept | |
{ | |
first.swap(second); | |
} | |
~HashSet() | |
{ | |
clear(); | |
} | |
void insert(const T& item) | |
{ | |
size_t index = _hash(item); | |
Node* node = _nodes[index]; | |
while (node) | |
{ | |
if (node->item == item) return; | |
node = node->next; | |
} | |
node = new Node(item, _nodes[index]); | |
_nodes[index] = node; | |
if (++_size == _threshold) _resize(); | |
} | |
bool contains(const T& item) | |
{ | |
size_t index = _hash(item); | |
for (Node* node = _nodes[index]; node; node = node->next) | |
{ | |
if (node->item == item) return true; | |
} | |
return false; | |
} | |
void erase(const T& item) | |
{ | |
size_t index = _hash(item); | |
Node* node = _nodes[index]; | |
Node* previous = nullptr; | |
for ( ; node; previous = node, node = node->next) | |
{ | |
if (node->item == item) | |
{ | |
if (previous) previous->next = node->next; | |
else _nodes[index] = node->next; | |
delete node; | |
if (--_size == _threshold/4) _resize(); | |
return; | |
} | |
} | |
throw std::invalid_argument("No such item!"); | |
} | |
void clear() | |
{ | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
for (Node* node = _nodes[i], *next; node; node = next) | |
{ | |
next = node->next; | |
delete node; | |
} | |
_nodes[i] = nullptr; | |
} | |
_size = 0; | |
} | |
size_t size() const | |
{ | |
return _size; | |
} | |
bool is_empty() const | |
{ | |
return _size == 0; | |
} | |
private: | |
static const size_t _bin_size = 4; | |
struct Node | |
{ | |
Node(const T& t, Node* node) | |
: item(t) | |
, next(node) | |
{ } | |
T item; | |
Node* next; | |
}; | |
size_t _hash(const T& item) const | |
{ | |
return _hash_function(item) % _capacity; | |
} | |
void _resize() | |
{ | |
size_t new_size = _size * 2; | |
if (new_size < minimum_capacity) return; | |
_capacity = new_size / _bin_size; | |
Node** old = _nodes; | |
_nodes = new Node*[_capacity]; | |
for (size_t i = 0; i < _capacity; ++i) | |
{ | |
_nodes[i] = nullptr; | |
} | |
_rehash(old, _threshold / _bin_size); | |
_threshold = new_size; | |
delete [] old; | |
} | |
void _rehash(Node** old, size_t old_capacity) | |
{ | |
for (size_t i = 0; i < old_capacity; ++i) | |
{ | |
for (Node* node = old[i], *next; node; node = next) | |
{ | |
size_t index = _hash(node->item); | |
next = node->next; | |
node->next = _nodes[index]; | |
_nodes[index] = node; | |
} | |
old[i] = nullptr; | |
} | |
} | |
Node** _nodes; | |
size_t _capacity; | |
size_t _size; | |
size_t _threshold; | |
hash_function_t _hash_function; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment