Skip to content

Instantly share code, notes, and snippets.

@masih
Created January 28, 2025 14:45
Show Gist options
  • Save masih/2272b1e0c7f2e4ffb9303390a3b2db0e to your computer and use it in GitHub Desktop.
Save masih/2272b1e0c7f2e4ffb9303390a3b2db0e to your computer and use it in GitHub Desktop.
ZSTD decompression benchmark across CGO bindings and pure-go
│ datadog │ klauspost │ klauspostSync │
│ sec/op │ sec/op vs base │ sec/op vs base │
1MiB 187.8µ ± 14% 353.5µ ± 1% +88.22% (p=0.002 n=6) 202.9µ ± 5% ~ (p=0.394 n=6)
10MiB 1.240m ± 3% 1.508m ± 0% +21.57% (p=0.002 n=6) 1.158m ± 3% -6.60% (p=0.002 n=6)
100MiB 12.97m ± 7% 15.58m ± 2% +20.19% (p=0.002 n=6) 13.94m ± 4% +7.48% (p=0.004 n=6)
calib 4.471 ± 3% 22.065 ± 0% +393.48% (p=0.002 n=6) 13.362 ± 2% +198.85% (p=0.002 n=6)
geomean 10.78m 20.69m +91.93% 14.46m +34.18%
│ datadog │ klauspost │ klauspostSync │
│ B/s │ B/s vs base │ B/s vs base │
1MiB 5.200Gi ± 13% 2.762Gi ± 1% -46.87% (p=0.002 n=6) 4.812Gi ± 5% ~ (p=0.394 n=6)
10MiB 7.875Gi ± 3% 6.478Gi ± 0% -17.74% (p=0.002 n=6) 8.433Gi ± 3% +7.09% (p=0.002 n=6)
100MiB 7.533Gi ± 7% 6.267Gi ± 2% -16.80% (p=0.002 n=6) 7.008Gi ± 4% -6.97% (p=0.004 n=6)
calib 1753.5Mi ± 3% 355.3Mi ± 0% -79.74% (p=0.002 n=6) 586.8Mi ± 2% -66.54% (p=0.002 n=6)
geomean 4.794Gi 2.498Gi -47.90% 3.573Gi -25.47%
│ datadog │ klauspost │ klauspostSync │
│ B/op │ B/op vs base │ B/op vs base │
1MiB 265.1Ki ± 0% 3621.9Ki ± 0% +1266.08% (p=0.002 n=6) 3210.1Ki ± 0% +1110.75% (p=0.002 n=6)
10MiB 265.1Ki ± 0% 3621.9Ki ± 0% +1266.04% (p=0.002 n=6) 3210.1Ki ± 0% +1110.72% (p=0.002 n=6)
100MiB 265.3Ki ± 0% 3621.9Ki ± 0% +1265.30% (p=0.002 n=6) 3210.1Ki ± 0% +1110.06% (p=0.002 n=6)
calib 177.226Mi ± 0% 254.683Mi ± 0% +43.70% (p=0.002 n=6) 3.442Mi ± 0% -98.06% (p=0.002 n=6)
geomean 1.325Mi 10.30Mi +677.87% 3.209Mi +142.27%
│ datadog │ klauspost │ klauspostSync │
│ allocs/op │ allocs/op vs base │ allocs/op vs base │
1MiB 11.00 ± 0% 40.00 ± 0% +263.64% (p=0.002 n=6) 15.00 ± 0% +36.36% (p=0.002 n=6)
10MiB 11.00 ± 0% 40.00 ± 0% +263.64% (p=0.002 n=6) 15.00 ± 0% +36.36% (p=0.002 n=6)
100MiB 11.00 ± 0% 40.00 ± 0% +263.64% (p=0.002 n=6) 15.00 ± 0% +36.36% (p=0.002 n=6)
calib 24.00 ± 8% 1198968.00 ± 0% +4995600.00% (p=0.002 n=6) 85.00 ± 9% +254.17% (p=0.002 n=6)
geomean 13.37 526.3 +3836.86% 23.14 +73.11%
goos: darwin
goarch: arm64
pkg: github.com/filecoin-project/lotus
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6537 184130 ns/op 5694.77 MB/s 271456 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6609 187220 ns/op 5600.76 MB/s 271500 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6488 186428 ns/op 5624.57 MB/s 271508 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6488 188419 ns/op 5565.14 MB/s 271525 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6256 210423 ns/op 4983.19 MB/s 271490 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=1MiB-12 6453 214802 ns/op 4881.59 MB/s 271459 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 1023 1220457 ns/op 8591.67 MB/s 271508 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 973 1240480 ns/op 8452.99 MB/s 271498 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 945 1278491 ns/op 8201.67 MB/s 271541 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 961 1240009 ns/op 8456.20 MB/s 271475 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 972 1233977 ns/op 8497.53 MB/s 271482 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=10MiB-12 962 1240259 ns/op 8454.50 MB/s 271525 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 93 12844269 ns/op 8163.77 MB/s 271443 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 93 12814099 ns/op 8182.99 MB/s 271621 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 93 12787690 ns/op 8199.89 MB/s 271532 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 92 13295125 ns/op 7886.92 MB/s 271718 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 90 13869239 ns/op 7560.44 MB/s 271678 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=100MiB-12 92 13087046 ns/op 8012.32 MB/s 271810 B/op 11 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4471782458 ns/op 1838.51 MB/s 185835160 B/op 24 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4458900833 ns/op 1843.82 MB/s 185835160 B/op 24 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4448838542 ns/op 1847.99 MB/s 185835160 B/op 24 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4609361666 ns/op 1783.63 MB/s 185826944 B/op 22 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4516228750 ns/op 1820.41 MB/s 185835160 B/op 24 allocs/op
BenchmarkZstdDecompression/lib=datadog/size=calib-12 1 4470698375 ns/op 1838.95 MB/s 185826944 B/op 22 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3428 358039 ns/op 2928.67 MB/s 3708877 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3338 354863 ns/op 2954.88 MB/s 3708840 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3476 352028 ns/op 2978.67 MB/s 3708819 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3334 352788 ns/op 2972.25 MB/s 3708828 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3510 354233 ns/op 2960.13 MB/s 3708823 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=1MiB-12 3405 352501 ns/op 2974.68 MB/s 3708820 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 787 1506753 ns/op 6959.18 MB/s 3708823 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 800 1507858 ns/op 6954.08 MB/s 3708820 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 802 1502535 ns/op 6978.71 MB/s 3708838 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 793 1507363 ns/op 6956.36 MB/s 3708836 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 793 1514635 ns/op 6922.96 MB/s 3708832 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=10MiB-12 790 1514478 ns/op 6923.68 MB/s 3708806 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 75 15573807 ns/op 6732.95 MB/s 3708787 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 76 15583338 ns/op 6728.83 MB/s 3708829 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 76 15630582 ns/op 6708.49 MB/s 3708832 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 76 15574263 ns/op 6732.75 MB/s 3708812 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 76 15582373 ns/op 6729.24 MB/s 3708783 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=100MiB-12 76 15951276 ns/op 6573.62 MB/s 3708841 B/op 40 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22094323458 ns/op 372.11 MB/s 266996536 B/op 1198950 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22089419584 ns/op 372.19 MB/s 267007352 B/op 1198986 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22061964250 ns/op 372.65 MB/s 267139448 B/op 1198989 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22066948917 ns/op 372.57 MB/s 267097176 B/op 1198948 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22062594542 ns/op 372.64 MB/s 267107160 B/op 1199049 allocs/op
BenchmarkZstdDecompression/lib=klauspost/size=calib-12 1 22056633375 ns/op 372.74 MB/s 267011160 B/op 1198928 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 6150 209914 ns/op 4995.27 MB/s 3287130 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 5869 203293 ns/op 5157.94 MB/s 3287130 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 5485 203342 ns/op 5156.70 MB/s 3287129 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 6082 202582 ns/op 5176.07 MB/s 3287127 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 6394 193444 ns/op 5420.57 MB/s 3287131 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=1MiB-12 5775 197111 ns/op 5319.74 MB/s 3287131 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 1012 1175126 ns/op 8923.09 MB/s 3287137 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 969 1198416 ns/op 8749.69 MB/s 3287138 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 990 1174873 ns/op 8925.01 MB/s 3287137 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 993 1140693 ns/op 9192.45 MB/s 3287134 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 986 1132716 ns/op 9257.18 MB/s 3287136 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=10MiB-12 1015 1141662 ns/op 9184.65 MB/s 3287136 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 81 14442378 ns/op 7260.41 MB/s 3287129 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 84 13959976 ns/op 7511.30 MB/s 3287133 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 87 13767161 ns/op 7616.50 MB/s 3287126 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 87 13911591 ns/op 7537.43 MB/s 3287131 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 86 14114520 ns/op 7429.06 MB/s 3287126 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=100MiB-12 85 13884299 ns/op 7552.24 MB/s 3287124 B/op 15 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13421704917 ns/op 612.55 MB/s 3617496 B/op 93 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13620439375 ns/op 603.61 MB/s 3613280 B/op 85 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13354270583 ns/op 615.64 MB/s 3606528 B/op 79 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13264474125 ns/op 619.81 MB/s 3609952 B/op 86 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13370659042 ns/op 614.88 MB/s 3607728 B/op 82 allocs/op
BenchmarkZstdDecompression/lib=klauspostSync/size=calib-12 1 13301791250 ns/op 618.07 MB/s 3608928 B/op 85 allocs/op
PASS
ok github.com/filecoin-project/lotus 313.235s
package lotus
import (
"io"
"math/rand"
"os"
"path/filepath"
"testing"
datadog_zstd "github.com/DataDog/zstd"
klauspost_zstd "github.com/klauspost/compress/zstd"
"github.com/stretchr/testify/require"
)
func BenchmarkZstdDecompression(b *testing.B) {
const seed = 1413
var (
rng = rand.New(rand.NewSource(seed))
zstd1MiB = generateZstdCompressedFile(b, rng, 1<<20)
zstd10MiB = generateZstdCompressedFile(b, rng, 10<<20)
zstd100MiB = generateZstdCompressedFile(b, rng, 100<<20)
datadog = func(_ *testing.B, source io.Reader) io.Reader {
return datadog_zstd.NewReader(source)
}
klauspost = func(b *testing.B, source io.Reader) io.Reader {
subject, err := klauspost_zstd.NewReader(source)
require.NoError(b, err)
return subject
}
klauspostSync = func(b *testing.B, source io.Reader) io.Reader {
subject, err := klauspost_zstd.NewReader(source,
klauspost_zstd.WithDecoderConcurrency(1),
)
require.NoError(b, err)
return subject
}
)
calibnetSnapshot := compressedSample{
path: "/tmp/forest_snapshot_calibnet_2025-01-28_height_2357735.forest.car.zst",
size: 8221409415,
}
for _, benchmark := range []struct {
name string
target compressedSample
subject func(*testing.B, io.Reader) io.Reader
}{
{name: "lib=datadog/size=1MiB", subject: datadog, target: zstd1MiB},
{name: "lib=datadog/size=10MiB", subject: datadog, target: zstd10MiB},
{name: "lib=datadog/size=100MiB", subject: datadog, target: zstd100MiB},
{name: "lib=datadog/size=calib", subject: datadog, target: calibnetSnapshot},
{name: "lib=klauspost/size=1MiB", subject: klauspost, target: zstd1MiB},
{name: "lib=klauspost/size=10MiB", subject: klauspost, target: zstd10MiB},
{name: "lib=klauspost/size=100MiB", subject: klauspost, target: zstd100MiB},
{name: "lib=klauspost/size=calib", subject: klauspost, target: calibnetSnapshot},
{name: "lib=klauspostSync/size=1MiB", subject: klauspostSync, target: zstd1MiB},
{name: "lib=klauspostSync/size=10MiB", subject: klauspostSync, target: zstd10MiB},
{name: "lib=klauspostSync/size=100MiB", subject: klauspostSync, target: zstd100MiB},
{name: "lib=klauspostSync/size=calib", subject: klauspostSync, target: calibnetSnapshot},
} {
b.Run(benchmark.name, func(b *testing.B) {
b.SetBytes(benchmark.target.size)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
source, err := os.Open(benchmark.target.path)
require.NoError(b, err)
subject := benchmark.subject(b, source)
read, err := io.Copy(io.Discard, subject)
require.NoError(b, err)
require.Equal(b, benchmark.target.size, read)
require.NoError(b, source.Close())
}
})
}
}
func generateZstdCompressedFile(b *testing.B, rng *rand.Rand, size int64) compressedSample {
b.Helper()
dest := filepath.Join(b.TempDir(), "bench-test.zstd")
out, err := os.Create(dest)
require.NoError(b, err)
defer func() { require.NoError(b, out.Close()) }()
to := datadog_zstd.NewWriter(out)
from := &randomReader{rng: rng}
written, err := io.CopyN(to, from, size)
require.NoError(b, err)
require.Equal(b, size, written)
require.NoError(b, to.Close())
return compressedSample{
path: dest,
size: size,
}
}
var _ io.Reader = (*randomReader)(nil)
type randomReader struct{ rng *rand.Rand }
func (r *randomReader) Read(p []byte) (n int, err error) { return r.rng.Read(p) }
type compressedSample struct {
path string
size int64
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment