Last active
October 11, 2025 10:57
-
-
Save matheusd/42d5517a3d8c9676e82a6b7af2dce5b4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "math/bits" | |
| "math/rand/v2" | |
| "slices" | |
| "sort" | |
| "testing" | |
| ) | |
| func CountCondition(data []int) int { | |
| count := 0 | |
| for _, v := range data { | |
| if v > 128 { // Random data = 50% misprediction | |
| count++ | |
| } | |
| } | |
| return count | |
| } | |
| func CountConditionBranchless(data []int) int { | |
| count := 0 | |
| for _, v := range data { | |
| // No branch, just arithmetic | |
| count += int((uint(v) >> 7) & 1) | |
| } | |
| return count | |
| } | |
| func CountConditionArith(data []int) int { | |
| count := 0 | |
| for _, v := range data { | |
| temp := int64(v) - 129 | |
| count += 1 + int(temp>>63) | |
| } | |
| return count | |
| } | |
| func CountConditionSorted(data []int) int { | |
| sort.Ints(data) // Now branches are predictable! | |
| count := 0 | |
| for _, v := range data { | |
| if v > 128 { // First all false, then all true | |
| count++ | |
| } | |
| } | |
| return count | |
| } | |
| func onesCountLoop(i uint) int { | |
| return bits.OnesCount(uint(bits.OnesCount(uint(bits.OnesCount(uint(bits.OnesCount(i))))))) | |
| } | |
| func CountConditionBits(data []int) int { | |
| count := 0 | |
| for _, v := range data { | |
| // No branch, just arithmetic | |
| count += onesCountLoop(uint(v) >> 7) | |
| } | |
| return count | |
| } | |
| func CountConditionMin(data []int) int { | |
| count := 0 | |
| for _, v := range data { | |
| // No branch, just arithmetic | |
| count += int(min(uint(v)>>7, 1)) | |
| } | |
| return count | |
| } | |
| func TestMy(t *testing.T) { | |
| data := []int{0x10000, 1, 0} | |
| res := CountConditionBranchless(data) | |
| if res != 1 { | |
| t.Fatal("wrong") | |
| } | |
| // t.Fatal("not implemented") | |
| } | |
| func BenchmarkCount(b *testing.B) { | |
| rng := rand.New(rand.NewPCG(0x010203, 0x040506)) | |
| data := make([]int, 1000000) | |
| for i := range data { | |
| if rng.IntN(100) < 50 { | |
| data[i] = rng.IntN(128) | |
| } else { | |
| data[i] = rng.Int() | |
| } | |
| } | |
| // Make it hot. | |
| want := CountCondition(data) | |
| // Ensure every benchmark does the exact same thing. | |
| dataCopy := slices.Clone(data) | |
| b.Run("condition", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountCondition(dataCopy) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("bits", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| got = CountConditionBits(data) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("arith", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountConditionArith(dataCopy) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("min", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountConditionMin(dataCopy) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("sorted-every-iter", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountConditionSorted(dataCopy) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("sorted-once", func(b *testing.B) { | |
| var got int | |
| sort.Ints(data) | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountCondition(dataCopy) // Already sorted. | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| b.Run("branchless", func(b *testing.B) { | |
| var got int | |
| for b.Loop() { | |
| b.StopTimer() | |
| copy(dataCopy, data) | |
| b.StartTimer() | |
| got = CountConditionBranchless(dataCopy) | |
| } | |
| if got != want { | |
| b.Errorf("wrong counts : %d %d", got, want) | |
| } | |
| }) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| goos: linux | |
| goarch: amd64 | |
| pkg: main2 | |
| cpu: AMD Ryzen 3 2200G with Radeon Vega Graphics | |
| BenchmarkCount/condition-6 1506 795226 ns/op | |
| BenchmarkCount/bits-6 591 2009529 ns/op | |
| BenchmarkCount/arith-6 1531 775753 ns/op | |
| BenchmarkCount/min-6 1419 859400 ns/op | |
| BenchmarkCount/sorted-every-iter-6 16 70181151 ns/op | |
| BenchmarkCount/sorted-once-6 1468 795274 ns/op |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment