Skip to content

Instantly share code, notes, and snippets.

@matheusd
Last active October 11, 2025 10:57
Show Gist options
  • Select an option

  • Save matheusd/42d5517a3d8c9676e82a6b7af2dce5b4 to your computer and use it in GitHub Desktop.

Select an option

Save matheusd/42d5517a3d8c9676e82a6b7af2dce5b4 to your computer and use it in GitHub Desktop.
package main
import (
"math/bits"
"math/rand/v2"
"slices"
"sort"
"testing"
)
func CountCondition(data []int) int {
count := 0
for _, v := range data {
if v > 128 { // Random data = 50% misprediction
count++
}
}
return count
}
func CountConditionBranchless(data []int) int {
count := 0
for _, v := range data {
// No branch, just arithmetic
count += int((uint(v) >> 7) & 1)
}
return count
}
func CountConditionArith(data []int) int {
count := 0
for _, v := range data {
temp := int64(v) - 129
count += 1 + int(temp>>63)
}
return count
}
func CountConditionSorted(data []int) int {
sort.Ints(data) // Now branches are predictable!
count := 0
for _, v := range data {
if v > 128 { // First all false, then all true
count++
}
}
return count
}
func onesCountLoop(i uint) int {
return bits.OnesCount(uint(bits.OnesCount(uint(bits.OnesCount(uint(bits.OnesCount(i)))))))
}
func CountConditionBits(data []int) int {
count := 0
for _, v := range data {
// No branch, just arithmetic
count += onesCountLoop(uint(v) >> 7)
}
return count
}
func CountConditionMin(data []int) int {
count := 0
for _, v := range data {
// No branch, just arithmetic
count += int(min(uint(v)>>7, 1))
}
return count
}
func TestMy(t *testing.T) {
data := []int{0x10000, 1, 0}
res := CountConditionBranchless(data)
if res != 1 {
t.Fatal("wrong")
}
// t.Fatal("not implemented")
}
func BenchmarkCount(b *testing.B) {
rng := rand.New(rand.NewPCG(0x010203, 0x040506))
data := make([]int, 1000000)
for i := range data {
if rng.IntN(100) < 50 {
data[i] = rng.IntN(128)
} else {
data[i] = rng.Int()
}
}
// Make it hot.
want := CountCondition(data)
// Ensure every benchmark does the exact same thing.
dataCopy := slices.Clone(data)
b.Run("condition", func(b *testing.B) {
var got int
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountCondition(dataCopy)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("bits", func(b *testing.B) {
var got int
for b.Loop() {
got = CountConditionBits(data)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("arith", func(b *testing.B) {
var got int
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountConditionArith(dataCopy)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("min", func(b *testing.B) {
var got int
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountConditionMin(dataCopy)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("sorted-every-iter", func(b *testing.B) {
var got int
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountConditionSorted(dataCopy)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("sorted-once", func(b *testing.B) {
var got int
sort.Ints(data)
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountCondition(dataCopy) // Already sorted.
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
b.Run("branchless", func(b *testing.B) {
var got int
for b.Loop() {
b.StopTimer()
copy(dataCopy, data)
b.StartTimer()
got = CountConditionBranchless(dataCopy)
}
if got != want {
b.Errorf("wrong counts : %d %d", got, want)
}
})
}
goos: linux
goarch: amd64
pkg: main2
cpu: AMD Ryzen 3 2200G with Radeon Vega Graphics
BenchmarkCount/condition-6 1506 795226 ns/op
BenchmarkCount/bits-6 591 2009529 ns/op
BenchmarkCount/arith-6 1531 775753 ns/op
BenchmarkCount/min-6 1419 859400 ns/op
BenchmarkCount/sorted-every-iter-6 16 70181151 ns/op
BenchmarkCount/sorted-once-6 1468 795274 ns/op
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment