Last active
August 29, 2015 14:26
-
-
Save arnehormann/65421048f56ac108f6b5 to your computer and use it in GitHub Desktop.
Check speed and compression of `compress/flate` and `compress/gzip` against the `github.com/klauspost/compress` variants. Please read the comment below!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/binary" | |
"flag" | |
"fmt" | |
"io" | |
"os" | |
"runtime" | |
"sort" | |
"strconv" | |
"strings" | |
"time" | |
flstd "compress/flate" | |
gzstd "compress/gzip" | |
flkp "github.com/klauspost/compress/flate" | |
gzkp "github.com/klauspost/compress/gzip" | |
pgz "github.com/klauspost/pgzip" | |
) | |
var _ io.ReadCloser = (*readCloser)(nil) | |
type readCloser struct { | |
io.Reader | |
io.Closer | |
} | |
var _ io.ReadCloser = (*StatReader)(nil) | |
type StatReader struct { | |
R io.Reader | |
N int64 | |
NoClose bool | |
} | |
func mapReader(r io.Reader, err error) (*StatReader, error) { | |
if err != nil { | |
return nil, err | |
} | |
return &StatReader{R: r}, nil | |
} | |
func (r *StatReader) Read(d []byte) (int, error) { | |
n, err := r.R.Read(d) | |
r.N += int64(n) | |
return n, err | |
} | |
func (r *StatReader) Close() error { | |
if c, ok := r.R.(io.Closer); !r.NoClose && ok { | |
return c.Close() | |
} | |
return nil | |
} | |
var _ io.WriteCloser = (*StatWriter)(nil) | |
type StatWriter struct { | |
W io.Writer | |
N int64 | |
NoClose bool | |
} | |
func mapWriter(w io.Writer, err error) (*StatWriter, error) { | |
if err != nil { | |
return nil, err | |
} | |
return &StatWriter{W: w}, nil | |
} | |
func (w *StatWriter) Write(d []byte) (int, error) { | |
n, err := w.W.Write(d) | |
w.N += int64(n) | |
return n, err | |
} | |
func (w *StatWriter) Close() error { | |
if c, ok := w.W.(io.Closer); !w.NoClose && ok { | |
return c.Close() | |
} | |
return nil | |
} | |
type NoOp struct{} | |
func (n NoOp) Read(v []byte) (int, error) { | |
return len(v), nil | |
} | |
func (n NoOp) Write(v []byte) (int, error) { | |
return len(v), nil | |
} | |
type SeqGen struct { | |
i int | |
} | |
func (s *SeqGen) Read(v []byte) (int, error) { | |
b := byte(s.i) | |
for i := range v { | |
v[i], b = b, b+1 | |
} | |
return len(v), nil | |
} | |
type Rand struct { | |
// uses PCG (http://www.pcg-random.org/) | |
state uint64 | |
inc uint64 | |
} | |
const pcgmult64 = 6364136223846793005 | |
func NewRand(seed uint64) *Rand { | |
state := uint64(0) | |
inc := uint64(seed<<1) | 1 | |
state = state*pcgmult64 + (inc | 1) | |
state += uint64(seed) | |
state = state*pcgmult64 + (inc | 1) | |
return &Rand{ | |
state: state, | |
inc: inc, | |
} | |
} | |
func (r *Rand) Read(v []byte) (int, error) { | |
for w := v; len(w) > 0; w = w[4:] { | |
old := r.state | |
r.state = r.state*pcgmult64 + (r.inc | 1) | |
xorshifted := uint32(((old >> 18) ^ old) >> 27) | |
rot := uint32(old >> 59) | |
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31)) | |
// ok because len(v) % 4 == 0 | |
binary.LittleEndian.PutUint32(w, rnd) | |
} | |
return len(v), nil | |
} | |
var _ flag.Value = (*NumBytes)(nil) | |
type NumBytes int64 | |
func (n *NumBytes) String() string { | |
if *n == 0 { | |
return "all" | |
} | |
return strconv.FormatInt(int64(*n), 10) | |
} | |
func (n *NumBytes) Set(v string) error { | |
if v == "all" { | |
*n = 0 | |
return nil | |
} | |
f := int64(1) | |
if len(v) > 0 { | |
switch v[len(v)-1] { | |
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': | |
case 'k', 'K': | |
f = 1024 | |
case 'm', 'M': | |
f = 1024 * 1024 | |
case 'g', 'G': | |
f = 1024 * 1024 * 1024 | |
case 't', 'T': | |
f = 1024 * 1024 * 1024 * 1024 | |
case 'p', 'P': | |
f = 1024 * 1024 * 1024 * 1024 * 1024 | |
case 'e', 'E': | |
f = 1024 * 1024 * 1024 * 1024 * 1024 * 1024 | |
default: | |
return fmt.Errorf("invalid byte size %q, available postfixes kilo..exa (k,m,g,t,p,e)", v) | |
} | |
v = v[:len(v)-1] | |
} | |
i, err := strconv.ParseInt(v, 10, 64) | |
if err != nil { | |
return err | |
} | |
*n = NumBytes(f * i) | |
return nil | |
} | |
var sources = map[string]func(string) (*StatReader, error){ | |
"seq": func(string) (*StatReader, error) { return mapReader(&SeqGen{}, nil) }, | |
"zero": func(string) (*StatReader, error) { return mapReader(NoOp{}, nil) }, | |
"rand": func(string) (*StatReader, error) { return mapReader(NewRand(0xdeadbeef), nil) }, | |
"file": func(src string) (*StatReader, error) { | |
if src == "-" { | |
return &StatReader{ | |
R: os.Stdin, | |
NoClose: true, | |
}, nil | |
} | |
return mapReader(os.Open(src)) | |
}, | |
} | |
var extractors = map[string]func(*StatReader) (*StatReader, error){ | |
"flatekp": func(r *StatReader) (*StatReader, error) { return mapReader(flkp.NewReader(r), nil) }, | |
"flatestd": func(r *StatReader) (*StatReader, error) { return mapReader(flstd.NewReader(r), nil) }, | |
"gzkp": func(r *StatReader) (*StatReader, error) { return mapReader(gzkp.NewReader(r)) }, | |
"gzstd": func(r *StatReader) (*StatReader, error) { return mapReader(gzstd.NewReader(r)) }, | |
"pgzip": func(r *StatReader) (*StatReader, error) { return mapReader(pgz.NewReader(r)) }, | |
"none": func(r *StatReader) (*StatReader, error) { return &StatReader{R: r, NoClose: true}, nil }, | |
} | |
var compressors = map[string]func(*StatWriter, int) (*StatWriter, error){ | |
"flatekp": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(flkp.NewWriter(w, l)) }, | |
"flatestd": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(flstd.NewWriter(w, l)) }, | |
"gzkp": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(gzkp.NewWriterLevel(w, l)) }, | |
"gzstd": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(gzstd.NewWriterLevel(w, l)) }, | |
"pgzip": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(pgz.NewWriterLevel(w, l)) }, | |
"none": func(w *StatWriter, l int) (*StatWriter, error) { return &StatWriter{W: w, NoClose: true}, nil }, | |
} | |
var sinks = map[string]func(string) (*StatWriter, error){ | |
"none": func(string) (*StatWriter, error) { return mapWriter(NoOp{}, nil) }, | |
"file": func(dest string) (*StatWriter, error) { | |
if dest == "-" { | |
return &StatWriter{ | |
W: os.Stdout, | |
NoClose: true, | |
}, nil | |
} | |
return mapWriter(os.Create(dest)) | |
}, | |
} | |
var sourceOpts string | |
var extractorOpts string | |
var compressorOpts string | |
var sinkOpts string | |
func init() { | |
var tmp []string | |
for k, _ := range sources { | |
tmp = append(tmp, k) | |
} | |
sort.Strings(tmp) | |
sourceOpts, tmp = strings.Join(tmp, ","), tmp[:0] | |
for k, _ := range extractors { | |
tmp = append(tmp, k) | |
} | |
sort.Strings(tmp) | |
extractorOpts, tmp = strings.Join(tmp, ","), tmp[:0] | |
for k, _ := range compressors { | |
tmp = append(tmp, k) | |
} | |
sort.Strings(tmp) | |
compressorOpts, tmp = strings.Join(tmp, ","), tmp[:0] | |
for k, _ := range sinks { | |
tmp = append(tmp, k) | |
} | |
sort.Strings(tmp) | |
sinkOpts, tmp = strings.Join(tmp, ","), tmp[:0] | |
} | |
func helpAndQuit(fs *flag.FlagSet, v interface{}) { | |
var msg string | |
hasErr := true | |
switch err := v.(type) { | |
case error: | |
msg = err.Error() | |
case string: | |
msg = err | |
case bool: | |
// from help; no error | |
if err { | |
msg = "unknown flags: \n\t" + strings.Join(flag.Args(), "\n\t") | |
} else { | |
hasErr = false | |
} | |
default: | |
msg = "unknown type in panic" | |
} | |
if hasErr { | |
fmt.Fprintf(os.Stderr, "ERROR: "+msg+"\n\n") | |
} | |
fmt.Fprintln(os.Stderr, os.Args[0]+`: | |
This program helps to test the differences between implementations of | |
flate and gzip in the standard library and in github.com/klauspost/compress. | |
By default, it reads data from standard input and writes it to standard | |
output compressed with github.com/klauspost/compress/gzip. | |
Data is processed in a pipeline; each step is configurable and tracks the | |
number of bytes processed. | |
source(-s) => extract(-x) => compress(-c) => destination(-d) | |
The source can be a file (-s=file -i=FILENAME), the standard input stream | |
(-s=file -i=-), a repeating sequence of bytes between 0 and 255 (-s=seq), | |
a sequence of random bytes repeating each run (-s=rand), or an endless | |
stream of zeroes (-s=zero). | |
The amount of data read can be limited (-n=NUMBYTES). | |
The destination is either a file (-d=file -o=FILENAME), the standard output | |
stream (-d=file -o=-), or it is discarded (-d=none). | |
Compression (-c=...) and decompression/extraction (-x=...) can be disabled | |
(-c=none / -x=none) or set to a compression package. | |
- flatestd: compress/flate | |
- flatekp: github.com/klauspost/compress/flate | |
- gzstd: compress/gzip | |
- gzkp: github.com/klauspost/compress/gzip | |
- pgzip: github.com/klauspost/pgzip | |
For compression, the level is configurable, e.g. for fastest (-l=1) or for | |
best compression (-l=9). | |
The number of cpu cores set is also configurable (-cpus=2). | |
Statistics can be shown with (-stats), (-noheaders) suppresses the headers. | |
Available parameters:`) | |
fs.PrintDefaults() | |
os.Exit(-1) | |
} | |
func main() { | |
var ( | |
r *StatReader | |
x *StatReader | |
c *StatWriter | |
w *StatWriter | |
err error | |
) | |
// flag configuration variables | |
var ( | |
infile = "-" | |
outfile = "-" | |
src = "file" | |
dest = "file" | |
extmode = "none" | |
compmode = "gzkp" | |
complevel = -1 | |
rmax = NumBytes(-1) | |
cpus = runtime.GOMAXPROCS(0) | |
stats = false | |
noHeaders = false | |
help = false | |
) | |
fs := flag.NewFlagSet(os.Args[0], flag.PanicOnError) | |
fs.StringVar(&infile, "i", infile, "input file; For stdin or non-file: '','-'") | |
fs.StringVar(&outfile, "o", outfile, "output file; For stdout or non-file: '','-'") | |
fs.StringVar(&src, "s", src, "source; requires '-i' for 'file'. One of "+sourceOpts) | |
fs.StringVar(&dest, "d", dest, "destination; requires '-o' for 'file'. One of "+sinkOpts) | |
fs.StringVar(&extmode, "x", extmode, "extract; One of "+extractorOpts) | |
fs.StringVar(&compmode, "c", compmode, "compress; One of "+compressorOpts) | |
fs.IntVar(&complevel, "l", complevel, "compression level (-1|0..9)") | |
fs.Var(&rmax, "n", "max bytes read, 0 for all") | |
fs.IntVar(&cpus, "cpus", cpus, "number of cpu cores used (< 0 for all)") | |
fs.BoolVar(&stats, "stats", stats, "show stats") | |
fs.BoolVar(&noHeaders, "noheaders", noHeaders, "suppress stats headers (ignored when stats is not set)") | |
fs.BoolVar(&help, "h", help, "show this help text") | |
fs.Parse(os.Args[1:]) | |
if unknownFlags := fs.NArg() > 0; unknownFlags || help { | |
helpAndQuit(fs, unknownFlags) | |
} | |
if src == "" { | |
// map to stdin | |
src = "-" | |
} | |
if dest == "" { | |
// map to stdout | |
dest = "-" | |
} | |
if compmode == "none" { | |
complevel = 0 | |
} | |
if cpus < 0 { | |
cpus = runtime.NumCPU() | |
} | |
runtime.GOMAXPROCS(cpus) | |
// basic sanity checks | |
if src != "file" && infile != "-" { | |
panic("input file must be '-' for non file source") | |
} | |
if dest != "file" && outfile != "-" { | |
panic("output file must be '-' for non file destination") | |
} | |
if complevel < -1 || 9 < complevel { | |
panic("compression level -l=x must be (-1,0..9)") | |
} | |
if rmax < 0 { | |
panic("max bytes read is too small") | |
} | |
// close all open readers and writers | |
defer func() { | |
for _, closer := range []io.Closer{x, c, r, w} { | |
if closer != nil { | |
closer.Close() | |
} | |
} | |
}() | |
if f, ok := sources[src]; ok { | |
if r, err = f(infile); err != nil { | |
panic("could not create reader: " + err.Error()) | |
} | |
} else { | |
panic("source reader of type '" + src + "' is unsupported") | |
} | |
if f, ok := extractors[extmode]; ok { | |
if x, err = f(r); err != nil { | |
panic("could not create extractor: " + err.Error()) | |
} | |
} else { | |
panic("source extractor of type '" + extmode + "' is unsupported") | |
} | |
if f, ok := sinks[dest]; ok { | |
if w, err = f(outfile); err != nil { | |
panic("could not create writer: " + err.Error()) | |
} | |
} else { | |
panic("source writer of type '" + dest + "' is unsupported") | |
} | |
if f, ok := compressors[compmode]; ok { | |
if c, err = f(w, complevel); err != nil { | |
panic("could not create compressor: " + err.Error()) | |
} | |
} else { | |
panic("compressor of type '" + compmode + "' is unsupported") | |
} | |
// optionally limit data | |
if rmax > -1 { | |
x.R = &readCloser{ | |
Reader: &io.LimitedReader{ | |
R: r, // r is x.R | |
N: int64(rmax), | |
}, | |
Closer: r, | |
} | |
} | |
start := time.Now() | |
// adapted from io.Copy() | |
buf := make([]byte, 32*1024) | |
for { | |
nr, er := x.Read(buf) | |
if nr > 0 { | |
nw, ew := c.Write(buf[:nr]) | |
if ew != nil { | |
panic(ew) | |
} | |
if nr != nw { | |
panic(io.ErrShortWrite) | |
} | |
} | |
if er == io.EOF { | |
break | |
} | |
if er != nil { | |
panic(er) | |
} | |
} | |
if stats { | |
took := time.Since(start) | |
mbpsIn := (float64(r.N) / (1024 * 1024)) / took.Seconds() | |
mbpsOut := (float64(w.N) / (1024 * 1024)) / took.Seconds() | |
var format string | |
if !noHeaders { | |
format = "" + | |
"type in\tfile in\tbytes in\t" + | |
"type extract\tbytes extracted\t" + | |
"type compress\tlevel\tbytes compressed\t" + | |
"type out\tfile out\tbytes out\t" + | |
"cpus\tmillis\t" + | |
"mb/s in\tmb/s out\n" | |
} | |
format += "" + | |
"%s\t%s\t%d\t" + | |
"%s\t%d\t" + | |
"%s\t%d\t%d\t" + | |
"%s\t%s\t%d\t" + | |
"%d\t%.03f\t" + | |
"%.02f\t%.02f\n" | |
fmt.Fprintf(os.Stderr, format, | |
src, infile, r.N, | |
extmode, x.N, | |
compmode, complevel, c.N, | |
dest, outfile, w.N, | |
runtime.GOMAXPROCS(0), took.Seconds()*1000, | |
mbpsIn, mbpsOut, | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
UPDATE 2
Instructions below are for an old version. The program prints its help text with -h. It provides more detailed statistics and better control - and it's easier to adapt now.
It always works in a pipeline: read from a source, pass through decompressor, pass through compressor, write to destination. Each step is configurable and tracks the number processed bytes.
UPDATE 1
There's an adapted version friendlier to measurements on Windows, it can be found at https://gist.github.com/klauspost/00f7c9a19e56581f5ead
This is intended to be used with e.g.
pv
andtime
to measure the throughput.It always reads from
stdin
and writes tostderr
.The default setting is pass raw input to klauspost gzip with default compression and write it to stdout.
Unknown arguments print a help text:
Available producers / consumers:
raw
: stdin / stdoutflatekp
: stdin / stdout compressed with github.com/klauspost/compress/flateflatestd
: stdin / stdout compressed with compress/flategzkp
: stdin / stdout compressed with github.com/klauspost/compress/gzipgzstd
: stdin / stdout compressed with compress/gzipzero
: an infinite stream of zero bytesseq
: a repeating stream of bytes 0..255rand
: a deterministic stream of random numbers (the same sequence each call)none
: data sink (/dev/null)Some examples
You can also pipe in a file and write it to .gz - and compare the contents and sizes after decompression.
Or you can do so on the fly. Here's an example for a file named testdata: