Skip to content

Instantly share code, notes, and snippets.

@raharper
Last active October 26, 2023 16:49
Show Gist options
  • Save raharper/3828b89e2c32239ac96b8be6a80aca1f to your computer and use it in GitHub Desktop.
Save raharper/3828b89e2c32239ac96b8be6a80aca1f to your computer and use it in GitHub Desktop.
test out pgzip and umoci compress buffer size impact on resulting size and sha256sum
module main
go 1.21.3
require (
github.com/apex/log v1.9.0
github.com/klauspost/pgzip v1.2.6
golang.org/x/sys v0.13.0
)
require (
github.com/klauspost/compress v1.17.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
)
package main
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
"os"
"runtime"
"golang.org/x/sys/unix"
"github.com/apex/log"
gzip "github.com/klauspost/pgzip"
)
// Compressor is an interface which users can use to implement different
// compression types.
type Compressor interface {
// Compress sets up the streaming compressor for this compression type.
Compress(io.Reader, int, int) (io.ReadCloser, error)
// MediaTypeSuffix returns the suffix to be added to the layer to
// indicate what compression type is used, e.g. "gzip", or "" for no
// compression.
MediaTypeSuffix() string
}
// GzipCompressor provides gzip compression.
var GzipCompressor Compressor = gzipCompressor{}
type gzipCompressor struct{}
func (gz gzipCompressor) Compress(reader io.Reader, writeBufSize, readBufSize int) (io.ReadCloser, error) {
pipeReader, pipeWriter := io.Pipe()
gzw := gzip.NewWriter(pipeWriter)
if err := gzw.SetConcurrency(writeBufSize, 2*runtime.NumCPU()); err != nil {
return nil, fmt.Errorf("Error setting concurrency level to %v blocks: %s", 2*runtime.NumCPU(), err)
}
go func() {
if _, err := Copy(gzw, reader, readBufSize); err != nil {
log.Warnf("gzip compress: could not compress layer: %v", err)
// #nosec G104
_ = pipeWriter.CloseWithError(fmt.Errorf("Error compressing layer: %s", err))
return
}
if err := gzw.Close(); err != nil {
log.Warnf("gzip compress: could not close gzip writer: %v", err)
// #nosec G104
_ = pipeWriter.CloseWithError(fmt.Errorf("Error close gzip writer: %s", err))
return
}
if err := pipeWriter.Close(); err != nil {
log.Warnf("gzip compress: could not close pipe: %v", err)
// We don't CloseWithError because we cannot override the Close.
return
}
}()
return pipeReader, nil
}
func (gz gzipCompressor) MediaTypeSuffix() string {
return "gzip"
}
// Copy has identical semantics to io.Copy except it will automatically resume
// the copy after it receives an EINTR error.
func Copy(dst io.Writer, src io.Reader, size int) (int64, error) {
// Make a buffer so io.Copy doesn't make one for each iteration.
var buf []byte
// log.Infof("Copy called with size %d", size)
// size := 32 * 1024
if lr, ok := src.(*io.LimitedReader); ok && lr.N < int64(size) {
if lr.N < 1 {
size = 1
} else {
size = int(lr.N)
}
}
// log.Infof("Copy using size %d", size)
buf = make([]byte, size)
var written int64
for {
n, err := io.CopyBuffer(dst, src, buf)
written += n // n is always non-negative
if errors.Is(err, unix.EINTR) {
continue
}
return written, err
}
}
func doCompress(data []byte, writeBufSize, readBufSize int) {
r := bytes.NewReader(data)
var dest bytes.Buffer
// umoci defaults
compDefault, err := GzipCompressor.Compress(r, writeBufSize, readBufSize)
if err != nil {
panic(err.Error())
}
size, err := Copy(&dest, compDefault, readBufSize)
if err != nil {
panic(err.Error())
}
compDefault.Close()
hasher := sha256.New()
hasher.Write(dest.Bytes())
sha256Str := hex.EncodeToString(hasher.Sum(nil))
log.Infof("writeSize: %d readSize: %d resultSize: %d sha256: %s", writeBufSize, readBufSize, size, sha256Str)
}
func main() {
// create/load some known data, multi-megabyte
// sha256sum it
// compress with exec'ed gzip
// compress with pgzip with defaults, using Copy 32K buff
// compress with pgzip with defaults, using Copy 64k buff
// compress with pgzip with 1M size, using Copy 32K buff
// compress with pgzip with 1M size, using Copy 64K buff
// sha256sum each compressed object
// print results
helloWorld := []byte("hello, world\n")
yesImg, err := os.ReadFile("yes.img")
if err != nil {
log.Errorf("Missing yes.img file, generate with: yes | dd iflag=fullblock bs=1M count=2 of=yes.img")
os.Exit(1)
}
log.Infof("Testing Hello, World buffer")
doCompress(helloWorld, 256<<10, 32*1024) // umoci defaults
doCompress(helloWorld, 256<<10, 64*1024) // umoci deafults + larger copy buffer
doCompress(helloWorld, 256<<12, 32*1024) // umoci+stacker defaults
doCompress(helloWorld, 256<<12, 64*1024) // umoci+stacker defaults
fmt.Printf("\n")
log.Infof("Testing Yes 2M image buffer")
doCompress(yesImg, 256<<10, 32*1024) // umoci defaults
doCompress(yesImg, 256<<10, 64*1024) // umoci deafults + larger copy buffer
doCompress(yesImg, 256<<12, 32*1024) // umoci+stacker defaults
doCompress(yesImg, 256<<12, 64*1024) // umoci+stacker defaults
}
@raharper
Copy link
Author

To run, copy both files into a directory then

go get main
yes | dd iflag=fullblock bs=1M count=2 of=yes.img
go run .

Locally this produces this content:

2023/10/26 11:48:19  info Testing Hello, World buffer
2023/10/26 11:48:19  info writeSize: 262144 readSize: 32768 resultSize: 43 sha256: 327f58cb624bd3867b0216444ba93edaeebb889b55033b3e9d3750cd2307083a
2023/10/26 11:48:19  info writeSize: 262144 readSize: 65536 resultSize: 43 sha256: 327f58cb624bd3867b0216444ba93edaeebb889b55033b3e9d3750cd2307083a
2023/10/26 11:48:19  info writeSize: 1048576 readSize: 32768 resultSize: 43 sha256: 327f58cb624bd3867b0216444ba93edaeebb889b55033b3e9d3750cd2307083a
2023/10/26 11:48:19  info writeSize: 1048576 readSize: 65536 resultSize: 43 sha256: 327f58cb624bd3867b0216444ba93edaeebb889b55033b3e9d3750cd2307083a

2023/10/26 11:48:19  info Testing Yes 2M image buffer
2023/10/26 11:48:19  info writeSize: 262144 readSize: 32768 resultSize: 5156 sha256: 950cd8076819c74ca59a97446b8544fa898083ef184b5983875423244793455f
2023/10/26 11:48:19  info writeSize: 262144 readSize: 65536 resultSize: 5156 sha256: 950cd8076819c74ca59a97446b8544fa898083ef184b5983875423244793455f
2023/10/26 11:48:19  info writeSize: 1048576 readSize: 32768 resultSize: 2600 sha256: 5856f0398751abd8ee6a1a187b27bfeda0f4bd800950b55e254ca72947b7f4b5
2023/10/26 11:48:19  info writeSize: 1048576 readSize: 65536 resultSize: 2600 sha256: 5856f0398751abd8ee6a1a187b27bfeda0f4bd800950b55e254ca72947b7f4b5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment