Skip to content

Instantly share code, notes, and snippets.

@brentp
Created January 6, 2017 23:34
Show Gist options
  • Save brentp/fecdc6d1c16cde4992d96d7c7a205166 to your computer and use it in GitHub Desktop.
Save brentp/fecdc6d1c16cde4992d96d7c7a205166 to your computer and use it in GitHub Desktop.
comparing interval trees

compare different interval tree methods in go.

The parameters are:

// Number of intervals in the tree
var n_intervals = 30000
// length of intervals in the tree
var i_length = 1000
// length of the chromosome (max start of simulated intervals)
var chrom_length = 500000000
// for every query on an interval in the tree, we do 10 queries that are likely to return 0 results.
var missed = 10

This compares:

A roaring bitmap is useful only when we want to test for overlap with a given set of bases and there is no need to return the actual intervals. However, as implemented, it performs poorly when most queries are misses because it has to check each base against the bitmap.

On my machine, this gives:

BenchmarkBuildSTree-4     	     100	  39890326 ns/op
BenchmarkBuildRBTree-4    	     300	  15441800 ns/op
BenchmarkBuildRoaring-4   	     300	  16679602 ns/op
BenchmarkQuerySRTree-4    	      20	 247207683 ns/op
BenchmarkQueryRBTree-4    	      20	 263191114 ns/op
BenchmarkQueryRoaring-4   	      10	 396755688 ns/op

for

var n_intervals = 30000
var i_length = 1000
var chrom_length = 500000000
var missed = 30

and

BenchmarkBuildSTree-4     	     100	  42074482 ns/op
BenchmarkBuildRBTree-4    	     200	  16274626 ns/op
BenchmarkBuildRoaring-4   	     300	  17870629 ns/op
BenchmarkQuerySRTree-4    	     100	  51416598 ns/op
BenchmarkQueryRBTree-4    	     100	  45728452 ns/op
BenchmarkQueryRoaring-4   	     100	  49655717 ns/op

for `var missed = 3

Note that roaring starts approaching the interval trees at this point.

package tree_test
import (
"fmt"
"math/rand"
"testing"
"github.com/RoaringBitmap/roaring"
"github.com/biogo/store/interval"
"github.com/toberndo/go-stree/stree"
)
// Integer-specific intervals
type Interval struct {
Start, End int
UID uintptr
}
func (i Interval) Overlap(b interval.IntRange) bool {
// Half-open interval indexing.
return i.End > b.Start && i.Start < b.End
}
func (i Interval) ID() uintptr { return i.UID }
func (i Interval) Range() interval.IntRange { return interval.IntRange{i.Start, i.End} }
func (i Interval) String() string { return fmt.Sprintf("[%d,%d)#%d", i.Start, i.End, i.UID) }
var n_intervals = 30000
var i_length = 1000
var chrom_length = 500000000
var missed = 3
func genIntervals(length int, n int, imax int) []Interval {
ivs := make([]Interval, n)
for i := 0; i < n; i++ {
s := rand.Intn(imax)
e := s + length
ivs[i] = Interval{Start: s, End: e}
}
return ivs
}
func BenchmarkBuildSTree(b *testing.B) {
ivs := genIntervals(i_length, n_intervals, chrom_length)
b.ResetTimer()
for i := 0; i < b.N; i++ {
t := stree.NewTree()
for _, iv := range ivs {
t.Push(iv.Start, iv.End)
}
t.BuildTree()
}
}
func BenchmarkBuildRBTree(b *testing.B) {
ivs := genIntervals(i_length, n_intervals, chrom_length)
b.ResetTimer()
for i := 0; i < b.N; i++ {
tree := &interval.IntTree{}
for i, iv := range ivs {
iv.UID = uintptr(i)
tree.Insert(iv, false)
}
}
}
func BenchmarkBuildRoaring(b *testing.B) {
ivs := genIntervals(i_length, n_intervals, chrom_length)
b.ResetTimer()
for i := 0; i < b.N; i++ {
tree := roaring.New()
for i, iv := range ivs {
iv.UID = uintptr(i)
tree.AddRange(uint64(iv.Start), uint64(iv.End))
}
}
}
func BenchmarkQuerySRTree(b *testing.B) {
tree := stree.NewTree()
ivs := genIntervals(i_length, n_intervals, chrom_length)
for _, iv := range ivs {
tree.Push(iv.Start, iv.End)
}
tree.BuildTree()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, iv := range ivs {
if len(tree.Query(iv.Start, iv.End)) == 0 {
b.Fatalf("expected result for %d-%d", iv.Start, iv.End)
}
for k := 0; k < missed; k++ {
tree.Query(iv.End+3, iv.End+3+i_length)
}
}
}
}
func BenchmarkQueryRBTree(b *testing.B) {
tree := &interval.IntTree{}
ivs := genIntervals(i_length, n_intervals, chrom_length)
for i, iv := range ivs {
iv.UID = uintptr(i)
tree.Insert(iv, false)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, iv := range ivs {
if len(tree.Get(Interval{Start: iv.Start, End: iv.End})) == 0 {
b.Fatalf("expected result for %d-%d", iv.Start, iv.End)
}
for k := 0; k < missed; k++ {
tree.Get(&Interval{Start: iv.End + 3, End: iv.End + 3 + i_length})
}
}
}
}
func BenchmarkQueryRoaring(b *testing.B) {
tree := roaring.New()
ivs := genIntervals(i_length, n_intervals, chrom_length)
for _, iv := range ivs {
tree.AddRange(uint64(iv.Start), uint64(iv.End))
}
tree.RunOptimize()
//fmt.Printf("%+v\n", tree.Stats())
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, iv := range ivs {
found := false
for p := iv.Start; p < iv.End; p++ {
if tree.Contains(uint32(p)) {
found = true
break
}
}
if !found {
b.Fatalf("expected result for %d-%d", iv.Start, iv.End)
}
for k := 0; k < missed; k++ {
o := roaring.New()
o.AddRange(uint64(iv.End+3), uint64(iv.End+3+i_length))
o.Intersects(tree)
/*
for p := iv.End + 3; p < iv.End+3+i_length; p++ {
if tree.Contains(uint32(p)) {
break
}
}
*/
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment