Last active
March 28, 2016 13:13
-
-
Save klauspost/00f7c9a19e56581f5ead to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5 | |
import ( | |
"bufio" | |
"encoding/binary" | |
"flag" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"os" | |
"runtime" | |
"time" | |
flstd "compress/flate" | |
gzstd "compress/gzip" | |
"github.com/biogo/hts/bgzf" | |
flkp "github.com/klauspost/compress/flate" | |
gzkp "github.com/klauspost/compress/gzip" | |
pgz "github.com/klauspost/pgzip" | |
"golang.org/x/build/pargzip" | |
"github.com/golang/snappy" | |
"github.com/klauspost/dedup" | |
"github.com/klauspost/readahead" | |
//"github.com/rasky/go-lzo" | |
"github.com/youtube/vitess/go/cgzip" | |
) | |
type NoOp struct{} | |
func (n NoOp) Read(v []byte) (int, error) { | |
return len(v), nil | |
} | |
func (n NoOp) Write(v []byte) (int, error) { | |
return len(v), nil | |
} | |
type SeqGen struct { | |
i int | |
} | |
func (s *SeqGen) Read(v []byte) (int, error) { | |
b := byte(s.i) | |
for i := range v { | |
v[i], b = b, b+1 | |
} | |
return len(v), nil | |
} | |
type Rand struct { | |
// uses PCG (http://www.pcg-random.org/) | |
state uint64 | |
inc uint64 | |
} | |
const pcgmult64 = 6364136223846793005 | |
func NewRand(seed uint64) *Rand { | |
state := uint64(0) | |
inc := uint64(seed<<1) | 1 | |
state = state*pcgmult64 + (inc | 1) | |
state += uint64(seed) | |
state = state*pcgmult64 + (inc | 1) | |
return &Rand{ | |
state: state, | |
inc: inc, | |
} | |
} | |
func (r *Rand) Read(v []byte) (int, error) { | |
for w := v; len(w) > 0; w = w[4:] { | |
old := r.state | |
r.state = r.state*pcgmult64 + (r.inc | 1) | |
xorshifted := uint32(((old >> 18) ^ old) >> 27) | |
rot := uint32(old >> 59) | |
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31)) | |
// ok because len(v) % 4 == 0 | |
binary.LittleEndian.PutUint32(w, rnd) | |
} | |
return len(v), nil | |
} | |
type wcounter struct { | |
n int | |
out io.Writer | |
} | |
func (w *wcounter) Write(p []byte) (n int, err error) { | |
n, err = w.out.Write(p) | |
w.n += n | |
return n, err | |
} | |
func main() { | |
/* defer func() { | |
if p := recover(); p != nil { | |
var msg string | |
switch err := p.(type) { | |
case error: | |
msg = err.Error() | |
case string: | |
msg = err | |
default: | |
os.Stderr.WriteString("unknown type in panic") | |
os.Exit(1) | |
} | |
os.Stderr.WriteString(msg) | |
os.Exit(1) | |
} | |
}() | |
*/ | |
rmode := "raw" | |
wmode := "gzkp" | |
wlevel := -1 | |
in := "-" | |
out := "-" | |
cpu := 0 | |
stats := false | |
header := true | |
flag.StringVar(&rmode, "r", rmode, "read mode (raw|flatekp|flatestd|gzkp|pgzip|cgzip|gzstd|zero|seq|rand)") | |
flag.StringVar(&wmode, "w", wmode, "write mode (raw|flatekp|flatestd|gzkp|pgzip|gzstd|cgzip|none)") | |
flag.StringVar(&in, "in", rmode, "input file name, default is '-', stdin") | |
flag.StringVar(&out, "out", rmode, "input file name, default is '-', stdin") | |
flag.IntVar(&wlevel, "l", wlevel, "compression level (-2|-1|0..9)") | |
flag.IntVar(&cpu, "cpu", cpu, "GOMAXPROCS number (0|1...)") | |
flag.BoolVar(&stats, "stats", false, "show stats") | |
flag.BoolVar(&header, "header", true, "show stats header") | |
flag.Parse() | |
if flag.NArg() > 0 { | |
flag.PrintDefaults() | |
} | |
if cpu <= 0 { | |
cpu = runtime.NumCPU() | |
} | |
runtime.GOMAXPROCS(cpu) | |
if wlevel < -2 || 9 < wlevel { | |
panic("compression level -l=x must be (-2,0..9)") | |
} | |
var err error | |
var r io.Reader | |
if in == "-" { | |
r = os.Stdin | |
} else { | |
r, err = os.Open(in) | |
if err != nil { | |
panic(err) | |
} | |
r, _ = readahead.NewReaderSize(r, 10, 10<<20) | |
} | |
var source bool | |
switch rmode { | |
case "zero": | |
// NoOp writes what the original buffer contained unchanged. | |
// As that buffer is initialized with 0 and not changed, | |
// NoOp is usable as a very fast zero-reader. | |
r = NoOp{} | |
source = true | |
case "seq": | |
r = &SeqGen{} | |
source = true | |
case "rand": | |
r = NewRand(0xdeadbeef) | |
source = true | |
case "raw": | |
case "gzkp": | |
var gzr *gzkp.Reader | |
if gzr, err = gzkp.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "bgzf": | |
var gzr *bgzf.Reader | |
if gzr, err = bgzf.NewReader(r, cpu); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "pgzip": | |
var gzr *pgz.Reader | |
if gzr, err = pgz.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "cgzip": | |
var gzr io.ReadCloser | |
if gzr, err = cgzip.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "gzstd": | |
var gzr *gzstd.Reader | |
if gzr, err = gzstd.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "flatekp": | |
fr := flkp.NewReader(r) | |
defer fr.Close() | |
r = fr | |
case "flatestd": | |
fr := flstd.NewReader(r) | |
defer fr.Close() | |
r = fr | |
default: | |
panic("read mode -r=x must be (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)") | |
} | |
if err != nil { | |
panic(err) | |
} | |
var w io.Writer | |
if out == "-" { | |
w = os.Stdout | |
} else if out == "*" { | |
w = ioutil.Discard | |
out = "discard" | |
} else { | |
f, err := os.Create(out) | |
if err != nil { | |
panic(err) | |
} | |
w = bufio.NewWriter(f) | |
} | |
outSize := &wcounter{out: w} | |
w = outSize | |
var sink bool | |
switch wmode { | |
case "none": | |
w = NoOp{} | |
sink = true | |
case "raw": | |
case "gzkp": | |
var gzw *gzkp.Writer | |
if gzw, err = gzkp.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
w = gzw | |
} | |
case "pgzip": | |
var gzw *pgz.Writer | |
if gzw, err = pgz.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
w = gzw | |
} | |
case "bgzf": | |
var gzw *bgzf.Writer | |
if gzw, err = bgzf.NewWriterLevel(w, wlevel, cpu); err == nil { | |
defer gzw.Close() | |
w = gzw | |
} | |
case "pargzip": | |
var gzw *pargzip.Writer | |
gzw = pargzip.NewWriter(w) | |
gzw.UseSystemGzip = false | |
defer gzw.Close() | |
w = gzw | |
case "cgzip": | |
var gzw *cgzip.Writer | |
if gzw, err = cgzip.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
w = gzw | |
} | |
case "gzstd": | |
var gzw *gzstd.Writer | |
if gzw, err = gzstd.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
w = gzw | |
} | |
case "dedup": | |
var ddw dedup.Writer | |
if ddw, err = dedup.NewStreamWriter(w, dedup.ModeDynamic, 1024, 0); err == nil { | |
defer ddw.Close() | |
w = ddw | |
} | |
case "snappy": | |
sw := snappy.NewWriter(w) | |
w = sw | |
/* case "lzo1x": | |
sw := lzo.NewWriter(w, wlevel) | |
w = sw*/ | |
case "flatekp": | |
var fw *flkp.Writer | |
if fw, err = flkp.NewWriter(w, wlevel); err == nil { | |
defer fw.Close() | |
w = fw | |
} | |
case "flatestd": | |
var fw *flstd.Writer | |
if fw, err = flstd.NewWriter(w, wlevel); err == nil { | |
defer fw.Close() | |
w = fw | |
} | |
default: | |
panic("write mode -w=x must be (raw|flatekp|flatestd|gzkp|pgzip|gzstd|none)") | |
} | |
if err != nil { | |
panic(err) | |
} | |
if source && sink { | |
return | |
} | |
inSize := int64(0) | |
start := time.Now() | |
func() { | |
for _, mc := range []interface{}{r, w} { | |
if c, ok := mc.(io.Closer); ok { | |
defer c.Close() | |
} | |
} | |
nr, err := io.Copy(w, r) | |
inSize += nr | |
if err != nil && err != io.EOF { | |
panic(err) | |
} | |
}() | |
if stats { | |
elapsed := time.Since(start) | |
if header { | |
fmt.Printf("file\tin\tout\tlevel\tcpu\tinsize\toutsize\tmillis\tmb/s\n") | |
} | |
mbpersec := (float64(inSize) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) | |
fmt.Printf("%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%.02f\n", in, rmode, wmode, wlevel, cpu, inSize, outSize.n, elapsed/time.Millisecond, mbpersec) | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=-2 | |
SET LEVEL=1 | |
compress -in=%1 -out=* -stats -header=true -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=2 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=3 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=4 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=5 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=6 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=7 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=8 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=9 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt | |
SET LEVEL=-2 | |
echo.>>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt | |
compress -in=%1 -out=* -stats -header=false -w="pargzip" -l=0 >>results.txt | |
echo.>>results.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
go build compress.go | |
SET LEVEL=-2 | |
compress -in=%1 -out=* -stats -header=true -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=1 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=2 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=3 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=4 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=5 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=6 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=7 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=8 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt | |
SET LEVEL=9 | |
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment