Last active
August 16, 2017 17:33
-
-
Save musoftware/114af452665309c7070aba4aa82447c1 to your computer and use it in GitHub Desktop.
C# BigHashSet Class Greater than 2Gb.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class BigHashSet<T> : IEnumerable<T> | |
{ | |
private const ulong StartSize = 1 << 19; | |
private BigArray<ulong> _hashBuckets; | |
private ulong _hashCode; | |
private BigArray<Slot> _slots; | |
public ulong Count { get; private set; } | |
public ulong Length {get{ | |
return _hashBuckets.Length; | |
}} | |
public ulong DuplicateAddAttempts { get; private set; } | |
public ulong Resizes { get; private set; } | |
public ulong LastFindItemComparisons { get; private set; } | |
public float GrowthDelta = 2.0f; | |
public bool IsDirty { | |
get{ | |
return Count > GetTrueCount() || Count < _hashBuckets.Length; | |
} | |
} | |
public T this[ulong index] | |
{ | |
get | |
{ | |
if (index > Count) | |
throw new Exception("Getter: Index out of bounds " + index + " must be less than " + Count); | |
return _slots[index].value; | |
} | |
} | |
public BigHashSet(ulong size = 0) | |
{ | |
Initialize(size); | |
} | |
public BigHashSet(IEnumerable<T> collection) | |
{ | |
if (collection == null) | |
Initialize(); | |
else | |
{ | |
var coll = collection as ICollection<T>; | |
if (coll == null) | |
{ | |
Initialize(); | |
return; | |
} | |
Initialize((ulong) coll.Count); | |
foreach (var item in coll) | |
Add(item); | |
} | |
} | |
IEnumerator<T> IEnumerable<T>.GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
IEnumerator IEnumerable.GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
public Enumerator GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
public bool Add(T item) | |
{ | |
return Insert(item); | |
} | |
public bool AddRange(IEnumerable<T> collection) | |
{ | |
var coll = collection as ICollection<T>; | |
if (coll == null) | |
return false; | |
foreach (var item in coll) | |
Add(item); | |
return true; | |
} | |
public bool Contains(T item) | |
{ | |
return FindItem(item) != ulong.MaxValue; | |
} | |
public bool ContainsAllElements(IEnumerable<T> elemcol) | |
{ | |
return elemcol.All(Contains); | |
} | |
public void Clear() | |
{ | |
if (Count == 0) return; | |
Initialize(); | |
} | |
public void Release() | |
{ | |
if (Count == 0) return; | |
Clear(); | |
Compact(); | |
} | |
public void Compact() | |
{ | |
if (Count == 0) return; | |
var tc = GetTrueCount(); | |
var tset = new BigHashSet<T>(tc); | |
for (ulong i = 0; i < tc; i++) | |
if (_slots[i].hashCode > 0) | |
tset.Add(_slots[i].value); | |
Count = tc; | |
_slots = tset._slots; | |
_hashBuckets = tset._hashBuckets; | |
} | |
private bool Insert(T item) | |
{ | |
if (ItemExistsGenerateHashCode(item) == ulong.MaxValue) | |
{ | |
try | |
{ | |
if (Count >= _slots.Length) | |
Resize(); | |
} | |
catch (Exception e) | |
{ | |
throw new Exception("Error resizing hash bucket list " + e.Message); | |
} | |
var hashPos = _hashCode % _hashBuckets.Length; | |
var nextPos = _hashBuckets[hashPos]; | |
_hashBuckets[hashPos] = Count; | |
var news = new Slot(); | |
news.next = nextPos; | |
news.value = item; | |
news.hashCode = _hashCode; | |
_slots[Count] = news; | |
Count++; | |
return true; | |
} | |
DuplicateAddAttempts++; | |
return false; | |
} | |
private void Resize(ulong newsize = 0) | |
{ | |
Resizes++; | |
if (newsize == 0) | |
newsize = GetNewSize(); | |
var newhashebuckets = new BigArray<ulong>(newsize); | |
var newslots = new BigArray<Slot>(newsize); | |
for (ulong i = 0; i < _slots.Length; i++) | |
newslots[i] = _slots[i]; | |
for (ulong i = 0; i < newhashebuckets.Length; i++) | |
newhashebuckets[i] = ulong.MaxValue; | |
for (ulong i = 0; i < Count; i++) | |
{ | |
var pos = newslots[i].hashCode % newsize; | |
var prevpos = newhashebuckets[pos]; | |
newhashebuckets[pos] = i; | |
if (prevpos != ulong.MaxValue) | |
{ | |
var news = new Slot(); | |
news.next = prevpos; | |
news.value = newslots[i].value; | |
news.hashCode = newslots[i].hashCode; | |
newslots[i] = news; | |
} | |
} | |
_hashBuckets = newhashebuckets; | |
_slots = newslots; | |
} | |
private ulong GetNewSize() | |
{ | |
return (ulong) (Count * GrowthDelta); | |
} | |
public ulong FindItem(T item) | |
{ | |
var hashCode = (uint) item.GetHashCode(); | |
LastFindItemComparisons = 0; | |
for (var position = _hashBuckets[hashCode % _hashBuckets.Length]; position != ulong.MaxValue; position = _slots[position].next) | |
{ | |
LastFindItemComparisons++; | |
if (_slots[position].hashCode == hashCode && _slots[position].value.Equals(item)) | |
return position; | |
} | |
return ulong.MaxValue; | |
} | |
private ulong ItemExistsGenerateHashCode(T item) | |
{ | |
_hashCode = (uint) item.GetHashCode(); | |
for (var position = _hashBuckets[_hashCode % _hashBuckets.Length]; position != ulong.MaxValue; position = _slots[position].next) | |
if (_slots[position].hashCode == _hashCode && _slots[position].value.Equals(item)) | |
return position; | |
return ulong.MaxValue; | |
} | |
private void Initialize(ulong size = 0) | |
{ | |
if (size == 0) | |
size = StartSize; | |
_hashBuckets = new BigArray<ulong>(size); | |
_slots = new BigArray<Slot>(size); | |
Count = 0; | |
for (ulong i = 0; i < _hashBuckets.Length; i++) | |
_hashBuckets[i] = ulong.MaxValue; | |
} | |
public void CopyTo(T[] array) | |
{ | |
CopyTo(array, 0, Count); | |
} | |
public void CopyTo(T[] array, ulong arrayIndex, ulong count) | |
{ | |
ulong Copied = 0; | |
for (ulong i = 0; i < Count && Copied < count; i++) | |
if (_slots[i].hashCode > 0) | |
array[arrayIndex + Copied++] = _slots[i].value; | |
} | |
private ulong GetTrueCount() | |
{ | |
ulong tCount = 0; | |
for (ulong i = 0; i < Count; i++) | |
if (_slots[i].hashCode > 0) | |
tCount++; | |
return tCount; | |
} | |
public T[] ToArray() | |
{ | |
var newArray = new T[GetTrueCount()]; | |
CopyTo(newArray); | |
return newArray; | |
} | |
private struct Slot | |
{ | |
public ulong hashCode; | |
public ulong next; | |
public T value; | |
} | |
[Serializable] | |
public struct Enumerator : IEnumerator<T> | |
{ | |
private readonly BigHashSet<T> set; | |
private ulong index; | |
public T Current { get; private set; } | |
object IEnumerator.Current | |
{ | |
get | |
{ | |
if (index == 0 || index == set.Count + 1) | |
throw new InvalidOperationException("Enumerator out of range: " + index); | |
return Current; | |
} | |
} | |
internal Enumerator(BigHashSet<T> set) | |
{ | |
this.set = set; | |
index = 0; | |
Current = default(T); | |
} | |
public void Dispose() | |
{ | |
} | |
public bool MoveNext() | |
{ | |
if (index < set.Count) | |
{ | |
Current = set._slots[index].value; | |
index++; | |
return true; | |
} | |
index = set.Count + 1; | |
Current = default(T); | |
return false; | |
} | |
void IEnumerator.Reset() | |
{ | |
index = 0; | |
Current = default(T); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment