Skip to content

Instantly share code, notes, and snippets.

@xfthhxk
Last active July 9, 2024 12:34
Show Gist options
  • Save xfthhxk/cd1c63cef701e233a40f2040a998c852 to your computer and use it in GitHub Desktop.
Save xfthhxk/cd1c63cef701e233a40f2040a998c852 to your computer and use it in GitHub Desktop.
RoaringBitmap Clojure wrapper
(ns roaring-bitmap
(:refer-clojure :exclude [contains? empty? min max seq])
(:import (java.io
DataInput
DataOutput)
(java.nio
ByteBuffer)
(org.roaringbitmap
Container
ContainerPointer
FastAggregation
IntConsumer
PeekableIntIterator
RoaringBitmap
RoaringBitmapWriter)
(org.roaringbitmap.buffer
MutableRoaringBitmap)))
;; requires in deps.edn
;; org.roaringbitmap/RoaringBitmap {:mvn/version "0.9.31"}
(set! *warn-on-reflection* true)
(defn roaring-bitmap?
[x]
(instance? RoaringBitmap x))
(defn ->bitmap
(^RoaringBitmap [] (RoaringBitmap.))
(^RoaringBitmap [xs]
(let [w (-> (RoaringBitmapWriter/writer)
(.initialCapacity Short/MAX_VALUE)
(.optimiseForRuns)
(.get))]
(doseq [x xs]
(when x
(.add w x)))
(.get w))))
(defn from-ordered
^RoaringBitmap [xs]
(RoaringBitmap/bitmapOf (int-array xs)))
(defn from-unordered
^RoaringBitmap [xs]
(RoaringBitmap/bitmapOfUnordered (int-array xs)))
(defn from-range
^RoaringBitmap [^long start ^long end]
(RoaringBitmap/bitmapOfRange start end))
(defn add!
"Add all value or values, modifying `rb` in place"
^RoaringBitmap [^RoaringBitmap rb x-or-xs]
(doseq [x (if (int? x-or-xs) [x-or-xs] x-or-xs)]
(.add rb ^int x))
rb)
(defn add-range!
"Add range by modifying `rb` in place."
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(.add rb range-start range-end)
rb)
(defn add-range
"Add range to `rb` returning a new `RoaringBitmap`"
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(RoaringBitmap/add rb range-start range-end))
(defn add-n!
"Set the specified values to true, within given boundaries"
^RoaringBitmap [^RoaringBitmap rb xs ^long offset ^long n]
(.addN rb (int-array xs) offset n)
rb)
(defn add-offset
"Generates a new bitmap whtat has the same cardinality as `rb` but with all its values incremented by offset."
^RoaringBitmap [^RoaringBitmap rb ^long offset]
(RoaringBitmap/addOffset rb offset))
(defn append!
^RoaringBitmap [^RoaringBitmap rb key ^Container container]
(.append rb key container)
rb)
(defn array
^ints [^RoaringBitmap rb]
(.toArray rb))
(defn cardinality
^long [^RoaringBitmap rb]
(.getLongCardinality rb))
(defn cardinality-exceeds?
[^RoaringBitmap rb ^long threshold]
(.cardinalityExceeds rb threshold))
(defn clear!
^RoaringBitmap [^RoaringBitmap rb]
(.clear rb)
rb)
(defn clone
^RoaringBitmap [^RoaringBitmap rb]
(.clone rb))
(defn compression-run?
[^RoaringBitmap rb]
(.hasRunCompression rb))
(defn contains?
([^RoaringBitmap rb ^long x]
(.contains rb x))
([^RoaringBitmap rb ^long minimum ^long supremum]
(.contains rb minimum supremum)))
(defn container-pointer
^ContainerPointer [^RoaringBitmap rb]
(.getContainerPointer rb))
(defn deserialize-from-data-input!
^RoaringBitmap [^RoaringBitmap rb ^DataInput in]
(.deserialize rb in)
rb)
(defn deserialize-from-byte-buffer!
^RoaringBitmap [^RoaringBitmap rb ^ByteBuffer buf]
(.deserialize rb buf)
rb)
(defn deserialize!
(^RoaringBitmap [^bytes bs]
(let [^ByteBuffer buf (ByteBuffer/wrap bs)]
(deserialize! (->bitmap) buf)))
(^RoaringBitmap [^RoaringBitmap rb ^ByteBuffer buf]
(.deserialize rb buf)
rb))
(defn difference
"Returns a new `RoaringBitmap` after doing a set difference"
(^RoaringBitmap [] (->bitmap))
(^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/andNot a b)))
(defn difference!
"In-place difference"
^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(.andNot a b)
a)
(defn difference-cardinality
^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/andNotCardinality a b))
(defn empty?
[^RoaringBitmap rb]
(.isEmpty rb))
(defn flip
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(RoaringBitmap/flip rb range-start range-end))
(defn flip!
"Add the value if it is not already set otherwise remove it. Modifies in place."
^RoaringBitmap [^RoaringBitmap rb ^long x]
(.flip rb x)
rb)
(defn flip-range
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(RoaringBitmap/flip rb range-start range-end))
(defn flip-range!
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(.flip rb range-start range-end)
rb)
(defn for-each
[^RoaringBitmap rb on-int-fn]
(.forEach rb (reify IntConsumer
(accept
[_this i]
(on-int-fn i)))))
(defn hamming-similar?
[^RoaringBitmap a ^RoaringBitmap b ^long tolerance]
(.isHammingSimilar a b tolerance))
(defn int-iterator
^PeekableIntIterator [^RoaringBitmap rb]
(.getIntIterator rb))
(defn intersection!
"In-place insersection"
^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(.and a b)
a)
(defn intersection
"Returns a new RoaringBitmap after doing a set intersection"
(^RoaringBitmap [] (intersection []))
(^RoaringBitmap [xs]
(FastAggregation/and ^"[Lorg.roaringbitmap.RoaringBitmap;" (into-array RoaringBitmap xs)))
(^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b] (RoaringBitmap/and a b))
(^RoaringBitmap [a b & more]
(intersection (into [a b] more))))
(defn intersection-cardinality
[^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/andCardinality a b))
(defn intersects?
([^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/intersects a b))
([^RoaringBitmap rb ^long minimum ^long supremum]
(.intersects rb minimum supremum)))
(defn iterator
[^RoaringBitmap rb]
(.iterator rb))
(defn limit
^RoaringBitmap [^RoaringBitmap rb ^long max-cardinality]
(.limit rb max-cardinality))
(defn max
"Get the largest integer in `rb`."
[^RoaringBitmap rb]
(.last rb))
(defn maximum-serialized-size
[^long cardinality ^long universe-size]
(RoaringBitmap/maximumSerializedSize cardinality universe-size))
(defn memory-usage
"Estimated memory usage in bytes"
[^RoaringBitmap rb]
(.getLongSizeInBytes rb))
(defn min
"Get the smallest integer in `rb`."
[^RoaringBitmap rb]
(.first rb))
(defn mutable
(^MutableRoaringBitmap [] (MutableRoaringBitmap.))
(^MutableRoaringBitmap [^RoaringBitmap rb]
(.toMutableRoaringBitmap rb)))
(defn next-value
"Returns the first value equal to or larger than the provided value."
[^RoaringBitmap rb ^long from-value]
(.nextValue rb from-value))
(defn next-absent-value
[^RoaringBitmap rb ^long from-value]
(.nextAbsentValue rb from-value))
(defn optimize!
[^RoaringBitmap rb]
(.runOptimize rb))
(defn previous-absent-value
[^RoaringBitmap rb ^long from-value]
(.previousAbsentValue rb from-value))
(defn rank
[^RoaringBitmap rb ^long x]
(.rankLong rb x))
(defn range-cardinality
[^RoaringBitmap rb ^long start ^long end]
(.rangeCardinality rb start end))
(defn remove!
^RoaringBitmap [^RoaringBitmap rb x-or-xs]
(doseq [x (if (int? x-or-xs) [x-or-xs] x-or-xs)]
(.remove rb ^int x))
rb)
(defn remove-range!
"Remove range by modifying `rb` in place."
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(.remove rb range-start range-end)
rb)
(defn remove-range
"Remove range to `rb` returning a new `RoaringBitmap`"
^RoaringBitmap [^RoaringBitmap rb ^long range-start ^long range-end]
(RoaringBitmap/remove rb range-start range-end))
(defn remove-run-compression!
[^RoaringBitmap rb]
(.removeRunCompression rb))
(defn select
[^RoaringBitmap rb ^long i]
(.select rb i))
(defn seq
[^RoaringBitmap rb]
(-> rb .iterator iterator-seq))
(defn serialized-size
[^RoaringBitmap rb]
(.serializedSizeInBytes rb))
(defn serialize-to-data-output!
[^RoaringBitmap rb ^DataOutput out]
(.serialize rb out))
(defn serialize-to-byte-buffer!
[^RoaringBitmap rb ^ByteBuffer buf]
(.serialize rb buf))
(defn serialize!
"Serializes and returns bytes"
(^bytes [^RoaringBitmap rb]
(let [buf (ByteBuffer/allocate (serialized-size rb))]
(.serialize rb buf)
(.array buf)))
(^bytes [^RoaringBitmap rb ^ByteBuffer buf]
(.serialize rb buf)
(let [n (.position buf)
dest (byte-array n)]
(.get buf dest)
dest)))
(defn size
[^RoaringBitmap rb]
(.getSizeInBytes rb))
(defn subset?
"Is `a` a subset of `b`"
[^RoaringBitmap a ^RoaringBitmap b]
(.contains b a))
(defn trim!
^RoaringBitmap [^RoaringBitmap rb]
(.trim rb)
rb)
(defn union!
^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(.or a b)
a)
(defn union
"Returns a new RoaringBitmap after doing a set union"
(^RoaringBitmap [] (union []))
(^RoaringBitmap [xs]
(FastAggregation/or ^"[Lorg.roaringbitmap.RoaringBitmap;" (into-array RoaringBitmap xs)))
(^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b] (RoaringBitmap/or a b))
(^RoaringBitmap [a b & more]
(union (into [a b] more))))
(defn union-cardinality
[^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/orCardinality a b))
(defn xor!
"In-place xor"
^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b]
(.xor a b)
a)
(defn xor
"Returns a new RoaringBitmap after doing a set xor"
(^RoaringBitmap [] (xor []))
(^RoaringBitmap [xs]
(FastAggregation/xor ^"[Lorg.roaringbitmap.RoaringBitmap;" (into-array RoaringBitmap xs)))
(^RoaringBitmap [^RoaringBitmap a ^RoaringBitmap b] (RoaringBitmap/xor a b))
(^RoaringBitmap [a b & more]
(xor (into [a b] more))))
(defn xor-cardinality
[^RoaringBitmap a ^RoaringBitmap b]
(RoaringBitmap/xorCardinality a b))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment