madelson · January 1, 2016 15:35
diff --git a/CollectionEquals.cs b/CollectionEquals.cs
 using System;
 using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;

 namespace Medallion.Collections
 {
    public static class CollectionHelper
    {
        #region ---- CollectionEquals ----
        /// <summary>
        /// Determines whether <paramref name="this"/> and <paramref name="that"/> are equal in the sense of having the exact same
        /// elements. Unlike <see cref="Enumerable.SequenceEqual{TSource}(IEnumerable{TSource}, IEnumerable{TSource})"/>,
        /// this method disregards order. Unlike <see cref="ISet{T}.SetEquals(IEnumerable{T})"/>, this method does not disregard duplicates.
        /// An optional <paramref name="comparer"/> allows the equality semantics for the elements to be specified
        /// </summary>
        public static bool CollectionEquals<TElement>(this IEnumerable<TElement> @this, IEnumerable<TElement> that, IEqualityComparer<TElement> comparer = null)
        {
            Throw.IfNull(@this, "this");
            Throw.IfNull(that, "that");

            // FastCount optimization: If both of the collections are materialized and have counts, 
            // we can exit very quickly if those counts differ
            int thisCount, thatCount;
            var hasThisCount = TryFastCount(@this, out thisCount);
            bool hasThatCount;
            if (hasThisCount)
            {
                hasThatCount = TryFastCount(that, out thatCount);
                if (hasThatCount)
                {
                    if (thisCount != thatCount)
                    {
                        return false;
                    }
                    if (thisCount == 0)
                    {
                        return true;
                    }
                }
            }
            else
            {
                hasThatCount = false;
            }

            var cmp = comparer ?? EqualityComparer<TElement>.Default;

            var itemsEnumerated = 0;
            // SequenceEqual optimization: we reduce/avoid hashing
            // the collections have common prefixes, at the cost of only one
            // extra Equals() call in the case where the prefixes are not common
            using (var thisEnumerator = @this.GetEnumerator())
            using (var thatEnumerator = that.GetEnumerator())
            {
                while (true)
                {
                    var thisFinished = !thisEnumerator.MoveNext();
                    var thatFinished = !thatEnumerator.MoveNext();

                    if (thisFinished)
                    {
                        // either this shorter than that, or the two were sequence-equal
                        return thatFinished;
                    }
                    if (thatFinished)
                    {
                        // that shorter than this
                        return false;
                    }

                    // keep track of this so that we can factor it into count-based 
                    // logic below
                    ++itemsEnumerated;

                    if (!cmp.Equals(thisEnumerator.Current, thatEnumerator.Current))
                    {
                        break; // prefixes were not equal
                    }
                }

                // now, build a dictionary of item => count out of one collection and then
                // probe it with the other collection to look for mismatches

                // Build/Probe Choice optimization: if we know the count of one collection, we should
                // use the other collection to build the dictionary. That way we can bail immediately if
                // we see too few or too many items
                CountingSet<TElement> elementCounts;
                IEnumerator<TElement> probeSide;
                if (hasThisCount)
                {
                    // we know this's count => use that as the build side
                    probeSide = thisEnumerator;
                    var remaining = thisCount - itemsEnumerated;
                    if (hasThatCount)
                    {
                        // if we have both counts, that means they must be equal or we would have already
                        // exited. However, in this case, we know exactly the capacity needed for the dictionary
                        // so we can avoid resizing
                        elementCounts = new CountingSet<TElement>(capacity: remaining, comparer: cmp);
                        do
                        {
                            elementCounts.Increment(thatEnumerator.Current);
                        }
                        while (thatEnumerator.MoveNext());
                    }
                    else
                    {
                        elementCounts = TryBuildElementCountsWithKnownCount(thatEnumerator, remaining, cmp);
                    }
                }
                else if (TryFastCount(that, out thatCount))
                {
                    // we know that's count => use this as the build side
                    probeSide = thatEnumerator;
                    var remaining = thatCount - itemsEnumerated;
                    elementCounts = TryBuildElementCountsWithKnownCount(thisEnumerator, remaining, cmp);
                }
                else
                {
                    // when we don't know either count, just use that as the build side arbitrarily
                    probeSide = thisEnumerator;
                    elementCounts = new CountingSet<TElement>(cmp);
                    do
                    {
                        elementCounts.Increment(thatEnumerator.Current);
                    }
                    while (thatEnumerator.MoveNext());
                }

                // check whether we failed to construct a dictionary. This happens when we know
                // one of the counts and we detect, during construction, that the counts are unequal
                if (elementCounts == null)
                {
                    return false;
                }
                
                // probe the dictionary with the probe side enumerator
                do
                {
                    if (!elementCounts.TryDecrement(probeSide.Current))
                    {
                        // element in probe not in build => not equal
                        return false;
                    }
                }
                while (probeSide.MoveNext());

                // we are equal only if the loop above completely cleared out the dictionary
                return elementCounts.IsEmpty;
            }
        }

        /// <summary>
        /// Constructs a count dictionary, staying mindful of the known number of elements
        /// so that we bail early (returning null) if we detect a count mismatch
        /// </summary>
        private static CountingSet<TKey> TryBuildElementCountsWithKnownCount<TKey>(
            IEnumerator<TKey> elements, 
            int remaining,
            IEqualityComparer<TKey> comparer)
        {
            if (remaining == 0)
            {
                // don't build the dictionary at all if nothing should be in it
                return null;
            }

            const int MaxInitialElementCountsCapacity = 1024;
            var elementCounts = new CountingSet<TKey>(capacity: Math.Min(remaining, MaxInitialElementCountsCapacity), comparer: comparer);
            elementCounts.Increment(elements.Current);
            while (elements.MoveNext())
            {
                if (--remaining < 0)
                {
                    // too many elements
                    return null;
                }
                elementCounts.Increment(elements.Current);
            }

            if (remaining > 0)
            {
                // too few elements
                return null;
            }

            return elementCounts;
        }
        
        /// <summary>
        /// Key Lookup Reduction optimization: this custom datastructure halves the number of <see cref="IEqualityComparer{T}.GetHashCode(T)"/>
        /// and <see cref="IEqualityComparer{T}.Equals(T, T)"/> operations by building in the increment/decrement operations of a counting dictionary.
        /// This also solves <see cref="Dictionary{TKey, TValue}"/>'s issues with null keys
        /// </summary>
        private sealed class CountingSet<T>
        {
            // picked based on observing unit test performance
            private const double MaxLoad = .62;

            private readonly IEqualityComparer<T> comparer;
            private Bucket[] buckets;
            private int populatedBucketCount;
            /// <summary>
            /// When we reach this count, we need to resize
            /// </summary>
            private int nextResizeCount;            

            public CountingSet(IEqualityComparer<T> comparer, int capacity = 0)
            {
                this.comparer = comparer;
                // we pick the initial length by assuming our current table is one short of the desired
                // capacity and then using our standard logic of picking the next valid table size
                this.buckets = new Bucket[GetNextTableSize((int)(capacity / MaxLoad) - 1)];
                this.nextResizeCount = this.CalculateNextResizeCount();
            }

            public bool IsEmpty { get { return this.populatedBucketCount == 0; } }

            public void Increment(T item)
            {
                int bucketIndex;
                uint hashCode;
                if (this.TryFindBucket(item, out bucketIndex, out hashCode))
                {
                    // if a bucket already existed, just update it's count
                    ++this.buckets[bucketIndex].Count;
                }
                else
                {
                    // otherwise, claim a new bucket
                    this.buckets[bucketIndex].HashCode = hashCode;
                    this.buckets[bucketIndex].Value = item;
                    this.buckets[bucketIndex].Count = 1;
                    ++this.populatedBucketCount;

                    // resize the table if we've grown too full
                    if (this.populatedBucketCount == this.nextResizeCount)
                    {
                        var newBuckets = new Bucket[GetNextTableSize(this.buckets.Length)];

                        // rehash
                        for (var i = 0; i < this.buckets.Length; ++i)
                        {
                            var oldBucket = this.buckets[i];
                            if (oldBucket.HashCode != 0)
                            {
                                var newBucketIndex = oldBucket.HashCode % newBuckets.Length;
                                while (true)
                                {
                                    if (newBuckets[newBucketIndex].HashCode == 0)
                                    {
                                        newBuckets[newBucketIndex] = oldBucket;
                                        break;
                                    }

                                    newBucketIndex = (newBucketIndex + 1) % newBuckets.Length;
                                }
                            } 
                        }

                        this.buckets = newBuckets;
                        this.nextResizeCount = this.CalculateNextResizeCount();
                    }
                }
            }

            public bool TryDecrement(T item)
            {
                int bucketIndex;
                uint ignored;
                if (this.TryFindBucket(item, out bucketIndex, out ignored)
                    && this.buckets[bucketIndex].Count > 0)
                {
                    if (--this.buckets[bucketIndex].Count == 0)
                    {
                        // Note: we can't do this because it messes up our try-find logic
                        //// mark as unpopulated. Not strictly necessary because CollectionEquals always does all increments
                        //// before all decrements currently. However, this is very cheap to do and allowing the collection to
                        //// "just work" in any situation is a nice benefit
                        //// this.buckets[bucketIndex].HashCode = 0;

                        --this.populatedBucketCount;
                    }
                    return true;
                }

                return false;
            }

            private bool TryFindBucket(T item, out int index, out uint hashCode)
            {
                // we convert the raw hash code to a uint to get correctly-signed mod operations
                // and get rid of the zero value so that we can use 0 to mean "unoccupied"
                var rawHashCode = this.comparer.GetHashCode(item);
                hashCode = rawHashCode == 0 ? uint.MaxValue : unchecked((uint)rawHashCode);

                var bestBucketIndex = (int)(hashCode % this.buckets.Length);

                var bucketIndex = bestBucketIndex;
                while (true) // guaranteed to terminate because of how we set load factor
                {
                    var bucket = this.buckets[bucketIndex];
                    if (bucket.HashCode == 0)
                    {
                        // found unoccupied bucket
                        index = bucketIndex;
                        return false;
                    }
                    if (bucket.HashCode == hashCode && this.comparer.Equals(bucket.Value, item))
                    {
                        // found matching bucket
                        index = bucketIndex;
                        return true;
                    }

                    // otherwise march on to the next adjacent bucket
                    bucketIndex = (bucketIndex + 1) % this.buckets.Length;
                }
            }

            private int CalculateNextResizeCount()
            {
                return (int)(MaxLoad * this.buckets.Length) + 1;
            }

            private static readonly int[] HashTableSizes = new[]
            {
                // hash table primes from http://planetmath.org/goodhashtableprimes
                23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289,
                24593, 49157, 98317, 196613, 393241, 786433, 1572869,
                3145739, 6291469, 12582917, 25165843, 50331653, 100663319,
                201326611, 402653189, 805306457, 1610612741,
                // the first two values are (1) a prime roughly half way between the previous value and int.MaxValue
                // and (2) the prime closest too, but not above, int.MaxValue. The maximum size is, of course, int.MaxValue
                1879048201, 2147483629, int.MaxValue
            };

            private static int GetNextTableSize(int currentSize)
            {
                for (var i = 0; i < HashTableSizes.Length; ++i)
                {
                    var nextSize = HashTableSizes[i];
                    if (nextSize > currentSize) { return nextSize; }
                }

                throw new InvalidOperationException("Hash table cannot expand further");
            }

            [DebuggerDisplay("{Value}, {Count}, {HashCode}")]
            private struct Bucket
            {
                // note: 0 (default) means the bucket is unoccupied
                internal uint HashCode;
                internal T Value;
                internal int Count;
            }
        }
        #endregion

        private static bool TryFastCount<T>(IEnumerable<T> @this, out int count)
        {
            var collection = @this as ICollection<T>;
            if (collection != null)
            {
                count = collection.Count;
                return true;
            }

            var readOnlyCollection = @this as IReadOnlyCollection<T>;
            if (readOnlyCollection != null)
            {
                count = readOnlyCollection.Count;
                return true;
            }

            count = -1;
            return false;
        }
    }
 }
	using System;
	using System.Collections;
	using System.Collections.Generic;
	using System.Diagnostics;
	using System.Linq;
	using System.Text;
	using System.Threading.Tasks;

	namespace Medallion.Collections
	{
	public static class CollectionHelper
	{
	#region ---- CollectionEquals ----
	/// <summary>
	/// Determines whether <paramref name="this"/> and <paramref name="that"/> are equal in the sense of having the exact same
	/// elements. Unlike <see cref="Enumerable.SequenceEqual{TSource}(IEnumerable{TSource}, IEnumerable{TSource})"/>,
	/// this method disregards order. Unlike <see cref="ISet{T}.SetEquals(IEnumerable{T})"/>, this method does not disregard duplicates.
	/// An optional <paramref name="comparer"/> allows the equality semantics for the elements to be specified
	/// </summary>
	public static bool CollectionEquals<TElement>(this IEnumerable<TElement> @this, IEnumerable<TElement> that, IEqualityComparer<TElement> comparer = null)
	{
	Throw.IfNull(@this, "this");
	Throw.IfNull(that, "that");

	// FastCount optimization: If both of the collections are materialized and have counts,
	// we can exit very quickly if those counts differ
	int thisCount, thatCount;
	var hasThisCount = TryFastCount(@this, out thisCount);
	bool hasThatCount;
	if (hasThisCount)
	{
	hasThatCount = TryFastCount(that, out thatCount);
	if (hasThatCount)
	{
	if (thisCount != thatCount)
	{
	return false;
	}
	if (thisCount == 0)
	{
	return true;
	}
	}
	}
	else
	{
	hasThatCount = false;
	}

	var cmp = comparer ?? EqualityComparer<TElement>.Default;

	var itemsEnumerated = 0;
	// SequenceEqual optimization: we reduce/avoid hashing
	// the collections have common prefixes, at the cost of only one
	// extra Equals() call in the case where the prefixes are not common
	using (var thisEnumerator = @this.GetEnumerator())
	using (var thatEnumerator = that.GetEnumerator())
	{
	while (true)
	{
	var thisFinished = !thisEnumerator.MoveNext();
	var thatFinished = !thatEnumerator.MoveNext();

	if (thisFinished)
	{
	// either this shorter than that, or the two were sequence-equal
	return thatFinished;
	}
	if (thatFinished)
	{
	// that shorter than this
	return false;
	}

	// keep track of this so that we can factor it into count-based
	// logic below
	++itemsEnumerated;

	if (!cmp.Equals(thisEnumerator.Current, thatEnumerator.Current))
	{
	break; // prefixes were not equal
	}
	}

	// now, build a dictionary of item => count out of one collection and then
	// probe it with the other collection to look for mismatches

	// Build/Probe Choice optimization: if we know the count of one collection, we should
	// use the other collection to build the dictionary. That way we can bail immediately if
	// we see too few or too many items
	CountingSet<TElement> elementCounts;
	IEnumerator<TElement> probeSide;
	if (hasThisCount)
	{
	// we know this's count => use that as the build side
	probeSide = thisEnumerator;
	var remaining = thisCount - itemsEnumerated;
	if (hasThatCount)
	{
	// if we have both counts, that means they must be equal or we would have already
	// exited. However, in this case, we know exactly the capacity needed for the dictionary
	// so we can avoid resizing
	elementCounts = new CountingSet<TElement>(capacity: remaining, comparer: cmp);
	do
	{
	elementCounts.Increment(thatEnumerator.Current);
	}
	while (thatEnumerator.MoveNext());
	}
	else
	{
	elementCounts = TryBuildElementCountsWithKnownCount(thatEnumerator, remaining, cmp);
	}
	}
	else if (TryFastCount(that, out thatCount))
	{
	// we know that's count => use this as the build side
	probeSide = thatEnumerator;
	var remaining = thatCount - itemsEnumerated;
	elementCounts = TryBuildElementCountsWithKnownCount(thisEnumerator, remaining, cmp);
	}
	else
	{
	// when we don't know either count, just use that as the build side arbitrarily
	probeSide = thisEnumerator;
	elementCounts = new CountingSet<TElement>(cmp);
	do
	{
	elementCounts.Increment(thatEnumerator.Current);
	}
	while (thatEnumerator.MoveNext());
	}

	// check whether we failed to construct a dictionary. This happens when we know
	// one of the counts and we detect, during construction, that the counts are unequal
	if (elementCounts == null)
	{
	return false;
	}

	// probe the dictionary with the probe side enumerator
	do
	{
	if (!elementCounts.TryDecrement(probeSide.Current))
	{
	// element in probe not in build => not equal
	return false;
	}
	}
	while (probeSide.MoveNext());

	// we are equal only if the loop above completely cleared out the dictionary
	return elementCounts.IsEmpty;
	}
	}

	/// <summary>
	/// Constructs a count dictionary, staying mindful of the known number of elements
	/// so that we bail early (returning null) if we detect a count mismatch
	/// </summary>
	private static CountingSet<TKey> TryBuildElementCountsWithKnownCount<TKey>(
	IEnumerator<TKey> elements,
	int remaining,
	IEqualityComparer<TKey> comparer)
	{
	if (remaining == 0)
	{
	// don't build the dictionary at all if nothing should be in it
	return null;
	}

	const int MaxInitialElementCountsCapacity = 1024;
	var elementCounts = new CountingSet<TKey>(capacity: Math.Min(remaining, MaxInitialElementCountsCapacity), comparer: comparer);
	elementCounts.Increment(elements.Current);
	while (elements.MoveNext())
	{
	if (--remaining < 0)
	{
	// too many elements
	return null;
	}
	elementCounts.Increment(elements.Current);
	}

	if (remaining > 0)
	{
	// too few elements
	return null;
	}

	return elementCounts;
	}

	/// <summary>
	/// Key Lookup Reduction optimization: this custom datastructure halves the number of <see cref="IEqualityComparer{T}.GetHashCode(T)"/>
	/// and <see cref="IEqualityComparer{T}.Equals(T, T)"/> operations by building in the increment/decrement operations of a counting dictionary.
	/// This also solves <see cref="Dictionary{TKey, TValue}"/>'s issues with null keys
	/// </summary>
	private sealed class CountingSet<T>
	{
	// picked based on observing unit test performance
	private const double MaxLoad = .62;

	private readonly IEqualityComparer<T> comparer;
	private Bucket[] buckets;
	private int populatedBucketCount;
	/// <summary>
	/// When we reach this count, we need to resize
	/// </summary>
	private int nextResizeCount;

	public CountingSet(IEqualityComparer<T> comparer, int capacity = 0)
	{
	this.comparer = comparer;
	// we pick the initial length by assuming our current table is one short of the desired
	// capacity and then using our standard logic of picking the next valid table size
	this.buckets = new Bucket[GetNextTableSize((int)(capacity / MaxLoad) - 1)];
	this.nextResizeCount = this.CalculateNextResizeCount();
	}

	public bool IsEmpty { get { return this.populatedBucketCount == 0; } }

	public void Increment(T item)
	{
	int bucketIndex;
	uint hashCode;
	if (this.TryFindBucket(item, out bucketIndex, out hashCode))
	{
	// if a bucket already existed, just update it's count
	++this.buckets[bucketIndex].Count;
	}
	else
	{
	// otherwise, claim a new bucket
	this.buckets[bucketIndex].HashCode = hashCode;
	this.buckets[bucketIndex].Value = item;
	this.buckets[bucketIndex].Count = 1;
	++this.populatedBucketCount;

	// resize the table if we've grown too full
	if (this.populatedBucketCount == this.nextResizeCount)
	{
	var newBuckets = new Bucket[GetNextTableSize(this.buckets.Length)];

	// rehash
	for (var i = 0; i < this.buckets.Length; ++i)
	{
	var oldBucket = this.buckets[i];
	if (oldBucket.HashCode != 0)
	{
	var newBucketIndex = oldBucket.HashCode % newBuckets.Length;
	while (true)
	{
	if (newBuckets[newBucketIndex].HashCode == 0)
	{
	newBuckets[newBucketIndex] = oldBucket;
	break;
	}

	newBucketIndex = (newBucketIndex + 1) % newBuckets.Length;
	}
	}
	}

	this.buckets = newBuckets;
	this.nextResizeCount = this.CalculateNextResizeCount();
	}
	}
	}

	public bool TryDecrement(T item)
	{
	int bucketIndex;
	uint ignored;
	if (this.TryFindBucket(item, out bucketIndex, out ignored)
	&& this.buckets[bucketIndex].Count > 0)
	{
	if (--this.buckets[bucketIndex].Count == 0)
	{
	// Note: we can't do this because it messes up our try-find logic
	//// mark as unpopulated. Not strictly necessary because CollectionEquals always does all increments
	//// before all decrements currently. However, this is very cheap to do and allowing the collection to
	//// "just work" in any situation is a nice benefit
	//// this.buckets[bucketIndex].HashCode = 0;

	--this.populatedBucketCount;
	}
	return true;
	}

	return false;
	}

	private bool TryFindBucket(T item, out int index, out uint hashCode)
	{
	// we convert the raw hash code to a uint to get correctly-signed mod operations
	// and get rid of the zero value so that we can use 0 to mean "unoccupied"
	var rawHashCode = this.comparer.GetHashCode(item);
	hashCode = rawHashCode == 0 ? uint.MaxValue : unchecked((uint)rawHashCode);

	var bestBucketIndex = (int)(hashCode % this.buckets.Length);

	var bucketIndex = bestBucketIndex;
	while (true) // guaranteed to terminate because of how we set load factor
	{
	var bucket = this.buckets[bucketIndex];
	if (bucket.HashCode == 0)
	{
	// found unoccupied bucket
	index = bucketIndex;
	return false;
	}
	if (bucket.HashCode == hashCode && this.comparer.Equals(bucket.Value, item))
	{
	// found matching bucket
	index = bucketIndex;
	return true;
	}

	// otherwise march on to the next adjacent bucket
	bucketIndex = (bucketIndex + 1) % this.buckets.Length;
	}
	}

	private int CalculateNextResizeCount()
	{
	return (int)(MaxLoad * this.buckets.Length) + 1;
	}

	private static readonly int[] HashTableSizes = new[]
	{
	// hash table primes from http://planetmath.org/goodhashtableprimes
	23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289,
	24593, 49157, 98317, 196613, 393241, 786433, 1572869,
	3145739, 6291469, 12582917, 25165843, 50331653, 100663319,
	201326611, 402653189, 805306457, 1610612741,
	// the first two values are (1) a prime roughly half way between the previous value and int.MaxValue
	// and (2) the prime closest too, but not above, int.MaxValue. The maximum size is, of course, int.MaxValue
	1879048201, 2147483629, int.MaxValue
	};

	private static int GetNextTableSize(int currentSize)
	{
	for (var i = 0; i < HashTableSizes.Length; ++i)
	{
	var nextSize = HashTableSizes[i];
	if (nextSize > currentSize) { return nextSize; }
	}

	throw new InvalidOperationException("Hash table cannot expand further");
	}

	[DebuggerDisplay("{Value}, {Count}, {HashCode}")]
	private struct Bucket
	{
	// note: 0 (default) means the bucket is unoccupied
	internal uint HashCode;
	internal T Value;
	internal int Count;
	}
	}
	#endregion

	private static bool TryFastCount<T>(IEnumerable<T> @this, out int count)
	{
	var collection = @this as ICollection<T>;
	if (collection != null)
	{
	count = collection.Count;
	return true;
	}

	var readOnlyCollection = @this as IReadOnlyCollection<T>;
	if (readOnlyCollection != null)
	{
	count = readOnlyCollection.Count;
	return true;
	}

	count = -1;
	return false;
	}
	}
	}