Created
January 27, 2015 18:04
-
-
Save xogeny/b819af6a0cf8ba1caaef to your computer and use it in GitHub Desktop.
Benchmark for append vs. copy in Golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package copy_vs_append | |
import ( | |
"testing" | |
) | |
func TestCopy(t *testing.T) { | |
y := doCopy(true, false) | |
if len(y) != 1000 { | |
t.Fatalf("Expected len(y) to be 1000 but was %d", len(y)) | |
} | |
} | |
func TestAppend(t *testing.T) { | |
y := doCopy(false, false) | |
if len(y) != 1000 { | |
t.Fatalf("Expected len(y) to be 1000 but was %d", len(y)) | |
} | |
} | |
func TestAppendAlloc(t *testing.T) { | |
y := doCopy(false, true) | |
if len(y) != 1000 { | |
t.Fatalf("Expected len(y) to be 1000 but was %d", len(y)) | |
} | |
} | |
func doCopy(useCopy bool, preAlloc bool) []int64 { | |
existing := make([]int64, 1000, 1000) | |
var y []int64 | |
if useCopy { | |
y = make([]int64, 1000, 1000) | |
copy(y, existing) | |
} else { | |
var init []int64 | |
if preAlloc { | |
init = make([]int64, 0, 1000) | |
} else { | |
init = []int64{} | |
} | |
y = append(init, existing...) | |
} | |
return y | |
} | |
func BenchmarkAppend(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
doCopy(false, false) | |
} | |
} | |
func BenchmarkAppendAlloc(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
doCopy(false, true) | |
} | |
} | |
func BenchmarkAppendAllocInline(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
existing := make([]int64, 1000, 1000) | |
var init []int64 | |
init = make([]int64, 0, 1000) | |
_ = append(init, existing...) | |
} | |
} | |
func BenchmarkCopy(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
doCopy(true, true) | |
} | |
} |
In my Ubuntu 18.04, Go version 1.13, CPUs 8, copy is faster:
goos: linux
goarch: amd64
pkg: github.com/alexyslozada/pruebas
BenchmarkAppend-8 846910 1284 ns/op
BenchmarkAppendAlloc-8 3836578 319 ns/op
BenchmarkAppendAllocInline-8 3863030 306 ns/op
BenchmarkCopy-8 3975591 300 ns/op
PASS
Debian 10, 4.19.0-8-amd64, Intel(R) Core(TM) i7-3930K CPU @ 3.20GHz
❯ go version
go version go1.12.14 linux/amd64
❯ go test -bench=. -benchmem
goos: linux
goarch: amd64
BenchmarkAppend-12 500000 2754 ns/op 8192 B/op 1 allocs/op
BenchmarkAppendAlloc-12 2000000 671 ns/op 0 B/op 0 allocs/op
BenchmarkAppendAllocInline-12 2000000 687 ns/op 0 B/op 0 allocs/op
BenchmarkCopy-12 2000000 697 ns/op 0 B/op 0 allocs/op
PASS
ok _/tmp/copy_vs_append 7.632s
❯ go version
go version go1.14 linux/amd64
❯ go test -bench=. -benchmem
goos: linux
goarch: amd64
BenchmarkAppend-12 363030 3560 ns/op 8192 B/op 1 allocs/op
BenchmarkAppendAlloc-12 1696290 680 ns/op 0 B/op 0 allocs/op
BenchmarkAppendAllocInline-12 1697086 706 ns/op 0 B/op 0 allocs/op
BenchmarkCopy-12 1710579 696 ns/op 0 B/op 0 allocs/op
PASS
ok _/tmp/copy_vs_append 6.917s
Ubuntu 20.04.1 LTS, 5.4.0-42-generic, Intel® Core™ i7-8665U CPU @ 1.90GHz × 8
> go version
go version go1.14.4 linux/amd64
> go test -bench=. -benchmem
goos: linux
goarch: amd64
BenchmarkAppend-8 1000000 1026 ns/op 8192 B/op 1 allocs/op
BenchmarkAppendAlloc-8 4462958 275 ns/op 0 B/op 0 allocs/op
BenchmarkAppendAllocInline-8 4360440 275 ns/op 0 B/op 0 allocs/op
BenchmarkCopy-8 4355202 275 ns/op 0 B/op 0 allocs/op
PASS
ok _/tmp/copy_vs_append 5.493s
Did some modification to the benchmark to meassure inline copy v.s. inline append for the pre-allocated case only (not counting the allocation cost of the 'existing' data to copy).
https://gist.github.com/smyrman/f7fd1734f9ea20d4648ed359bbcc6ac7
i believe this may be more relevant to compare. The benchmark code here include a number of branches (if statements
), which in themselves may be expensive (if predicted wrong), or at least expensive enough to affect the results.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
anyone arriving here should test the benchmark on their system. On the latest GO pre-allocating and copying is faster: