Skip to content

Instantly share code, notes, and snippets.

@jnorthrup
Last active September 26, 2024 18:59
Show Gist options
  • Save jnorthrup/b829e50dc7de4a810fae762fb505f9b3 to your computer and use it in GitHub Desktop.
Save jnorthrup/b829e50dc7de4a810fae762fb505f9b3 to your computer and use it in GitHub Desktop.
typealias Bucket<T> = Series<T>
typealias HashStrategy<T> = (T) -> Int
typealias HeapBucket<T> = Join<Range<T>, Bucket<T>>
class VersatileSet<T>(
private var elements: Bucket<T> = emptySeries(),
private val hashStrategy: HashStrategy<T> = { it.hashCode() }
) : MutableSet<T> {
companion object {
const val CACHE_LINE_SIZE = 16
const val BINARY_SEARCH_THRESHOLD = 256
const val HEAP_BUCKET_SIZE = 64
}
private var strategy: SetStrategy = SetStrategy.SINGLE_BUCKET
private var buckets: Series<Bucket<T>> = emptySeries()
private var heapBuckets: Series<HeapBucket<T>> = emptySeries()
private var isComparable: Boolean? = null
override val size: Int get() = when (strategy) {
SetStrategy.SINGLE_BUCKET, SetStrategy.BINARY_SEARCH -> elements.size
SetStrategy.HASH_BUCKETS -> buckets.fold(0) { acc, bucket -> acc + bucket.size }
SetStrategy.HEAP_SET -> heapBuckets.fold(0) { acc, bucket -> acc + bucket.b.size }
}
@Suppress("UNCHECKED_CAST")
override fun add(element: T): Boolean {
if (isComparable == null) {
isComparable = element is Comparable<*>
if (isComparable == true) {
strategy = SetStrategy.HEAP_SET
transitionToHeapSet()
}
}
return when (strategy) {
SetStrategy.SINGLE_BUCKET -> addToSingleBucket(element)
SetStrategy.BINARY_SEARCH -> addToBinarySearch(element)
SetStrategy.HASH_BUCKETS -> addToHashBuckets(element)
SetStrategy.HEAP_SET -> addToHeapSet(element as T & Comparable<T>)
}
}
private fun addToSingleBucket(element: T): Boolean {
if (element in elements) return false
elements += element
if (elements.size > CACHE_LINE_SIZE * 2) {
if (isComparable == true) transitionToHeapSet()
else transitionToBinarySearch()
}
return true
}
private fun addToBinarySearch(element: T): Boolean {
val index = (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>)
if (index >= 0) return false
val insertionPoint = -(index + 1)
elements = elements.size.inc() j { i ->
when {
i < insertionPoint -> elements[i]
i == insertionPoint -> element
else -> elements[i - 1]
}
}
if (elements.size > BINARY_SEARCH_THRESHOLD) {
transitionToHashBuckets()
}
return true
}
private fun addToHashBuckets(element: T): Boolean {
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size
val bucket = buckets[bucketIndex]
if (element in bucket) return false
buckets = buckets.size j { i ->
if (i == bucketIndex) bucket + element
else buckets[i]
}
return true
}
@Suppress("UNCHECKED_CAST")
private fun addToHeapSet(element: T & Comparable<T>): Boolean {
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element) }
val insertionBucketIndex = if (bucketIndex >= 0) bucketIndex else -(bucketIndex + 1)
if (insertionBucketIndex < heapBuckets.size) {
val bucket = heapBuckets[insertionBucketIndex]
if (element in bucket.b) return false
heapBuckets = heapBuckets.size j { i ->
if (i == insertionBucketIndex) {
val newBucket = bucket.b + element
(min(bucket.a.start, element)..max(bucket.a.endInclusive, element)) j newBucket
} else heapBuckets[i]
}
} else {
heapBuckets += (element..element) j Series(1) { element }
}
if (heapBuckets[insertionBucketIndex].b.size > HEAP_BUCKET_SIZE) {
splitHeapBucket(insertionBucketIndex)
}
return true
}
private fun splitHeapBucket(index: Int) {
val bucket = heapBuckets[index]
val midIndex = bucket.b.size / 2
val leftBucket = bucket.a.start..bucket.b[midIndex - 1] j bucket.b.take(midIndex)
val rightBucket = bucket.b[midIndex]..bucket.a.endInclusive j bucket.b.drop(midIndex)
heapBuckets = heapBuckets.size.inc() j { i ->
when {
i < index -> heapBuckets[i]
i == index -> leftBucket
i == index + 1 -> rightBucket
else -> heapBuckets[i - 1]
}
}
}
override fun clear() {
elements = emptySeries()
buckets = emptySeries()
heapBuckets = emptySeries()
strategy = SetStrategy.SINGLE_BUCKET
}
override fun iterator(): MutableIterator<T> = when (strategy) {
SetStrategy.SINGLE_BUCKET, SetStrategy.BINARY_SEARCH -> elements.toList().iterator() as MutableIterator<T>
SetStrategy.HASH_BUCKETS -> buckets.flatMap { it.toList() }.iterator() as MutableIterator<T>
SetStrategy.HEAP_SET -> heapBuckets.flatMap { it.b.toList() }.iterator() as MutableIterator<T>
}
override fun remove(element: T): Boolean = when (strategy) {
SetStrategy.SINGLE_BUCKET -> elements.toList().remove(element).also { if (it) elements = elements.filter { e -> e != element }.toSeries() }
SetStrategy.BINARY_SEARCH -> {
val index = (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>)
if (index < 0) false
else {
elements = elements.size.dec() j { i ->
when {
i < index -> elements[i]
else -> elements[i + 1]
}
}
true
}
}
SetStrategy.HASH_BUCKETS -> {
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size
val bucket = buckets[bucketIndex]
val newBucket = bucket.filter { it != element }.toSeries()
if (newBucket.size == bucket.size) false
else {
buckets = buckets.size j { i ->
if (i == bucketIndex) newBucket
else buckets[i]
}
true
}
}
SetStrategy.HEAP_SET -> {
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element as Comparable<Any>) }
if (bucketIndex < 0) false
else {
val bucket = heapBuckets[bucketIndex]
val newBucketElements = bucket.b.filter { it != element }.toSeries()
if (newBucketElements.size == bucket.b.size) false
else {
heapBuckets = if (newBucketElements.isEmpty()) {
heapBuckets.size.dec() j { i ->
when {
i < bucketIndex -> heapBuckets[i]
else -> heapBuckets[i + 1]
}
}
} else {
heapBuckets.size j { i ->
if (i == bucketIndex) {
val newRange = if (newBucketElements.size == 1) {
newBucketElements[0] as Comparable<Any>..newBucketElements[0] as Comparable<Any>
} else {
newBucketElements[0] as Comparable<Any>..newBucketElements.last() as Comparable<Any>
}
newRange j newBucketElements
} else heapBuckets[i]
}
}
true
}
}
}
}
override fun contains(element: T): Boolean = when (strategy) {
SetStrategy.SINGLE_BUCKET -> element in elements
SetStrategy.BINARY_SEARCH -> (elements as Series<Comparable<Any>>).binarySearch(element as Comparable<Any>) >= 0
SetStrategy.HASH_BUCKETS -> {
val bucketIndex = Math.abs(hashStrategy(element)) % buckets.size
element in buckets[bucketIndex]
}
SetStrategy.HEAP_SET -> {
val bucketIndex = heapBuckets.binarySearch { it.a.compareTo(element as Comparable<Any>) }
bucketIndex >= 0 && element in heapBuckets[bucketIndex].b
}
}
override fun containsAll(elements: Collection<T>): Boolean = elements.all { contains(it) }
override fun isEmpty(): Boolean = size == 0
override fun retainAll(elements: Collection<T>): Boolean = removeAll(this - elements.toSet())
override fun removeAll(elements: Collection<T>): Boolean = elements.map(::remove).any { it }
override fun addAll(elements: Collection<T>): Boolean = elements.map(::add).any { it }
private fun transitionToBinarySearch() {
elements = elements.toList().sortedBy { it as Comparable<Any> }.toSeries()
strategy = SetStrategy.BINARY_SEARCH
}
private fun transitionToHashBuckets() {
val bucketCount = size / 10 + 1
buckets = Series(bucketCount) { emptySeries() }
elements.forEach { element ->
val bucketIndex = Math.abs(hashStrategy(element)) % bucketCount
buckets = buckets.size j { i ->
if (i == bucketIndex) buckets[i] + element
else buckets[i]
}
}
elements = emptySeries()
strategy = SetStrategy.HASH_BUCKETS
}
private fun transitionToHeapSet() {
heapBuckets = if (elements.isEmpty()) {
emptySeries()
} else {
val sortedElements = elements.toList().sortedBy { it as Comparable<Any> }
Series(1) { (sortedElements.first() as Comparable<Any>..sortedElements.last() as Comparable<Any>) j sortedElements.toSeries() }
}
elements = emptySeries()
strategy = SetStrategy.HEAP_SET
}
private enum class SetStrategy {
SINGLE_BUCKET, BINARY_SEARCH, HASH_BUCKETS, HEAP_SET
}
}
// Factory function
fun <T> versatileSetOf(vararg elements: T): VersatileSet<T> =
VersatileSet<T>().apply { addAll(elements.toList()) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment