Last active
September 26, 2024 18:59
-
-
Save jnorthrup/b829e50dc7de4a810fae762fb505f9b3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
typealias Bucket<T> = Series<T> | |
typealias HashStrategy<T> = (T) -> Int | |
typealias HeapBucket<T> = Join<Range<T>, Bucket<T>> | |
class VersatileSet<T>( | |
private var elements: Bucket<T> = emptySeries(), | |
private val hashStrategy: HashStrategy<T> = { it.hashCode() } | |
) : MutableSet<T> { | |
companion object { | |
const val CACHE_LINE_SIZE = 16 | |
const val BINARY_SEARCH_THRESHOLD = 256 | |
const val HEAP_BUCKET_SIZE = 64 | |
} | |
private var strategy: SetStrategy = SetStrategy.SINGLE_BUCKET | |
private var buckets: Series<Bucket<T>> = emptySeries() | |
private var heapBuckets: Series<HeapBucket<T>> = emptySeries() | |
private var isComparable: Boolean? = null | |
override val size: Int get() = when (strategy) { | |
SetStrategy.SINGLE_BUCKET, SetStrategy.BINARY_SEARCH -> elements.size | |
SetStrategy.HASH_BUCKETS -> buckets.fold(0) { acc, bucket -> acc + bucket.size } | |
SetStrategy.HEAP_SET -> heapBuckets.fold(0) { acc, bucket -> acc + bucket.b.size } | |
} | |
@Suppress("UNCHECKED_CAST") | |
override fun add(element: T): Boolean { | |
if (isComparable == null) { | |
isComparable = element is Comparable<*> | |
if (isComparable == true) { | |
strategy = SetStrategy.HEAP_SET | |
transitionToHeapSet() | |
} | |
} | |
return when (strategy) { | |
SetStrategy.SINGLE_BUCKET -> addToSingleBucket(element) | |
SetStrategy.BINARY_SEARCH -> addToBinarySearch(element) | |
SetStrategy.HASH_BUCKETS -> addToHashBuckets(element) | |
SetStrategy.HEAP_SET -> addToHeapSet(element as T & Comparable<T>) | |
} | |
} | |
private fun addToSingleBucket(element: T): Boolean { | |
if (element in elements) return false | |
elements += element | |
if (elements.size > CACHE_LINE_SIZE * 2) { | |
if (isComparable == true) transitionToHeapSet() | |
else transitionToBinarySearch() | |
} | |
return true | |
} | |
private fun addToBinarySearch(element: T): Boolean { | |
val index = (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>) | |
if (index >= 0) return false | |
val insertionPoint = -(index + 1) | |
elements = elements.size.inc() j { i -> | |
when { | |
i < insertionPoint -> elements[i] | |
i == insertionPoint -> element | |
else -> elements[i - 1] | |
} | |
} | |
if (elements.size > BINARY_SEARCH_THRESHOLD) { | |
transitionToHashBuckets() | |
} | |
return true | |
} | |
private fun addToHashBuckets(element: T): Boolean { | |
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size | |
val bucket = buckets[bucketIndex] | |
if (element in bucket) return false | |
buckets = buckets.size j { i -> | |
if (i == bucketIndex) bucket + element | |
else buckets[i] | |
} | |
return true | |
} | |
@Suppress("UNCHECKED_CAST") | |
private fun addToHeapSet(element: T & Comparable<T>): Boolean { | |
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element) } | |
val insertionBucketIndex = if (bucketIndex >= 0) bucketIndex else -(bucketIndex + 1) | |
if (insertionBucketIndex < heapBuckets.size) { | |
val bucket = heapBuckets[insertionBucketIndex] | |
if (element in bucket.b) return false | |
heapBuckets = heapBuckets.size j { i -> | |
if (i == insertionBucketIndex) { | |
val newBucket = bucket.b + element | |
(min(bucket.a.start, element)..max(bucket.a.endInclusive, element)) j newBucket | |
} else heapBuckets[i] | |
} | |
} else { | |
heapBuckets += (element..element) j Series(1) { element } | |
} | |
if (heapBuckets[insertionBucketIndex].b.size > HEAP_BUCKET_SIZE) { | |
splitHeapBucket(insertionBucketIndex) | |
} | |
return true | |
} | |
private fun splitHeapBucket(index: Int) { | |
val bucket = heapBuckets[index] | |
val midIndex = bucket.b.size / 2 | |
val leftBucket = bucket.a.start..bucket.b[midIndex - 1] j bucket.b.take(midIndex) | |
val rightBucket = bucket.b[midIndex]..bucket.a.endInclusive j bucket.b.drop(midIndex) | |
heapBuckets = heapBuckets.size.inc() j { i -> | |
when { | |
i < index -> heapBuckets[i] | |
i == index -> leftBucket | |
i == index + 1 -> rightBucket | |
else -> heapBuckets[i - 1] | |
} | |
} | |
} | |
override fun clear() { | |
elements = emptySeries() | |
buckets = emptySeries() | |
heapBuckets = emptySeries() | |
strategy = SetStrategy.SINGLE_BUCKET | |
} | |
override fun iterator(): MutableIterator<T> = when (strategy) { | |
SetStrategy.SINGLE_BUCKET, SetStrategy.BINARY_SEARCH -> elements.toList().iterator() as MutableIterator<T> | |
SetStrategy.HASH_BUCKETS -> buckets.flatMap { it.toList() }.iterator() as MutableIterator<T> | |
SetStrategy.HEAP_SET -> heapBuckets.flatMap { it.b.toList() }.iterator() as MutableIterator<T> | |
} | |
override fun remove(element: T): Boolean = when (strategy) { | |
SetStrategy.SINGLE_BUCKET -> elements.toList().remove(element).also { if (it) elements = elements.filter { e -> e != element }.toSeries() } | |
SetStrategy.BINARY_SEARCH -> { | |
val index = (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>) | |
if (index < 0) false | |
else { | |
elements = elements.size.dec() j { i -> | |
when { | |
i < index -> elements[i] | |
else -> elements[i + 1] | |
} | |
} | |
true | |
} | |
} | |
SetStrategy.HASH_BUCKETS -> { | |
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size | |
val bucket = buckets[bucketIndex] | |
val newBucket = bucket.filter { it != element }.toSeries() | |
if (newBucket.size == bucket.size) false | |
else { | |
buckets = buckets.size j { i -> | |
if (i == bucketIndex) newBucket | |
else buckets[i] | |
} | |
true | |
} | |
} | |
SetStrategy.HEAP_SET -> { | |
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element as Comparable<Any>) } | |
if (bucketIndex < 0) false | |
else { | |
val bucket = heapBuckets[bucketIndex] | |
val newBucketElements = bucket.b.filter { it != element }.toSeries() | |
if (newBucketElements.size == bucket.b.size) false | |
else { | |
heapBuckets = if (newBucketElements.isEmpty()) { | |
heapBuckets.size.dec() j { i -> | |
when { | |
i < bucketIndex -> heapBuckets[i] | |
else -> heapBuckets[i + 1] | |
} | |
} | |
} else { | |
heapBuckets.size j { i -> | |
if (i == bucketIndex) { | |
val newRange = if (newBucketElements.size == 1) { | |
newBucketElements[0] as Comparable<Any>..newBucketElements[0] as Comparable<Any> | |
} else { | |
newBucketElements[0] as Comparable<Any>..newBucketElements.last() as Comparable<Any> | |
} | |
newRange j newBucketElements | |
} else heapBuckets[i] | |
} | |
} | |
true | |
} | |
} | |
} | |
} | |
override fun contains(element: T): Boolean = when (strategy) { | |
SetStrategy.SINGLE_BUCKET -> element in elements | |
SetStrategy.BINARY_SEARCH -> (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>) >= 0 | |
SetStrategy.HASH_BUCKETS -> { | |
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size | |
element in buckets[bucketIndex] | |
} | |
SetStrategy.HEAP_SET -> { | |
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element as Comparable<Any>) } | |
bucketIndex >= 0 && element in heapBuckets[bucketIndex].b | |
} | |
} | |
override fun containsAll(elements: Collection<T>): Boolean = elements.all { contains(it) } | |
override fun isEmpty(): Boolean = size == 0 | |
override fun retainAll(elements: Collection<T>): Boolean = removeAll(this - elements.toSet()) | |
override fun removeAll(elements: Collection<T>): Boolean = elements.map(::remove).any { it } | |
override fun addAll(elements: Collection<T>): Boolean = elements.map(::add).any { it } | |
private fun transitionToBinarySearch() { | |
elements = elements.toList().sortedBy { it as Comparable<Any> }.toSeries() | |
strategy = SetStrategy.BINARY_SEARCH | |
} | |
private fun transitionToHashBuckets() { | |
val bucketCount = size / 10 + 1 | |
buckets = Series(bucketCount) { emptySeries() } | |
elements.forEach { element -> | |
val bucketIndex = Math.abs(hashStrategy(element)) % bucketCount | |
buckets = buckets.size j { i -> | |
if (i == bucketIndex) buckets[i] + element | |
else buckets[i] | |
} | |
} | |
elements = emptySeries() | |
strategy = SetStrategy.HASH_BUCKETS | |
} | |
private fun transitionToHeapSet() { | |
heapBuckets = if (elements.isEmpty()) { | |
emptySeries() | |
} else { | |
val sortedElements = elements.toList().sortedBy { it as Comparable<Any> } | |
Series(1) { (sortedElements.first() as Comparable<Any>..sortedElements.last() as Comparable<Any>) j sortedElements.toSeries() } | |
} | |
elements = emptySeries() | |
strategy = SetStrategy.HEAP_SET | |
} | |
private enum class SetStrategy { | |
SINGLE_BUCKET, BINARY_SEARCH, HASH_BUCKETS, HEAP_SET | |
} | |
} | |
// Factory function | |
fun <T> versatileSetOf(vararg elements: T): VersatileSet<T> = | |
VersatileSet<T>().apply { addAll(elements.toList()) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment