-
-
Save smyrman/f7fd1734f9ea20d4648ed359bbcc6ac7 to your computer and use it in GitHub Desktop.
package benchcp_test | |
import ( | |
"fmt" | |
"testing" | |
) | |
import ( | |
"fmt" | |
"testing" | |
) | |
func BenchmarkAppendAllocInline(b *testing.B) { | |
for _, size := range []int{100, 1000, 10000} { | |
size := size | |
existing := make([]int64, size, size) | |
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) { | |
b.ResetTimer() | |
for i := 0; i < b.N; i++ { | |
target := make([]int64, 0, size) | |
target = append(target, existing...) | |
} | |
}) | |
} | |
} | |
func BenchmarkCopyInline(b *testing.B) { | |
for _, size := range []int{100, 1000, 10000} { | |
size := size | |
existing := make([]int64, size, size) | |
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) { | |
b.ResetTimer() | |
for i := 0; i < b.N; i++ { | |
target := make([]int64, size, size) | |
copy(target, existing) | |
} | |
}) | |
} | |
} |
$ go version | |
go version go1.18.3 darwin/amd64 | |
$ go test -bench=. -benchmem | |
goos: darwin | |
goarch: amd64 | |
pkg: play/bench | |
cpu: Intel(R) Core(TM) i7-6567U CPU @ 3.30GHz | |
BenchmarkAppendAllocInline/size=100-4 7442314 146.4 ns/op 896 B/op 1 allocs/op | |
BenchmarkAppendAllocInline/size=1000-4 929802 1397 ns/op 8192 B/op 1 allocs/op | |
BenchmarkAppendAllocInline/size=10000-4 105584 10860 ns/op 81920 B/op 1 allocs/op | |
BenchmarkCopyInline/size=100-4 7351207 150.4 ns/op 896 B/op 1 allocs/op | |
BenchmarkCopyInline/size=1000-4 920683 1296 ns/op 8192 B/op 1 allocs/op | |
BenchmarkCopyInline/size=10000-4 107530 10376 ns/op 81920 B/op 1 allocs/op | |
PASS | |
ok play/bench 9.644s |
That's because in BenchmarkAppendAllocInline
, you're allocating a 0 length slice for existing
. Meaning when you target = append(target, existing...)
, you're actually appending 0 elements. Changing the creation of existing
to existing := make([]int64, size, size)
brings both methods in line with each other in terms of performance.
@deitrix, I don't know how you came across this Gist, but thanks for finding this and pointing out the error. This faulty benchmark have actually led me to assume that appends are probably slightly faster, and if you hadn't pointed it out, I would have continued to believe so.
I have updated and re-run the benchmark. There is now, as you say, no conclusive answer on which one is faster.
For some reason, with my tests using Go 1.23.2, BenchmarkAppendAllocInline
appears to consistently win by a very thin margin with the benchmark code as is here. But also for some reason, if the make([]int64, size, size)
in BenchmarkCopyInline
is changed to make([]int64, size)
, it starts to consistently win, by a slightly larger margin than it was the other way around.
Copy shows to be ever so slightly slower. Inspection of the assembly might help answer why. If I where to guess, the difference is probably not in the append v.s. copy operation, but in the allocation step we do before the opperation:
target := make([]int64, 0, size)
).target := make([]int64, size, size)
).Again, we could inspect the assembly to know for sure.