Skip to content

Instantly share code, notes, and snippets.

@vbatts
Last active August 29, 2015 14:16
Show Gist options
  • Save vbatts/43fc209acf37ff21dd87 to your computer and use it in GitHub Desktop.
Save vbatts/43fc209acf37ff21dd87 to your computer and use it in GitHub Desktop.
checksum consistency of gzip (golang and gnu utils)

Overview

Choose or make a simple tar archive to work with.

Layout

$ ls -sh t.tar
24K t.tar

Build the golang tool

$ go build ./c.go

compare the golang vs gnu gzip

Regular compression

vbatts@noyee ~ (master *) $ ./c t.tar
2015/03/11 22:09:20 two passes of gzipping "t.tar" match! (f6a2c0441248a179c8a4b3c4ab7a05b6bba002ba)
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name | sha1sum
303354da22a0062936fe8218d7ede539bc95aa4b  -
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name | sha1sum
303354da22a0062936fe8218d7ede539bc95aa4b  -

Best Compression

vbatts@noyee ~ (master *) $ ./c -fast t.tar
2015/03/11 22:09:30 two passes of gzipping "t.tar" match! (7e2803957206d91cb2e0c8229ef0047a58def31c)
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name --fast | sha1sum
afb1cfef4f435b5323fd876beaaa1011f0c5fece  -
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name --fast | sha1sum
afb1cfef4f435b5323fd876beaaa1011f0c5fece  -

Fastest Compression

vbatts@noyee ~ (master *) $ ./c -best t.tar
2015/03/11 22:09:26 two passes of gzipping "t.tar" match! (4c14fcefe569e6c9407cd00dd18dce1dfa4559cb)
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name --best | sha1sum
e0b2923d4f5ddaaaf42f71220226a4c269ab78ed  -
vbatts@noyee ~ (master *) $ cat t.tar | gzip --no-name --best | sha1sum
e0b2923d4f5ddaaaf42f71220226a4c269ab78ed  -

Finding

Consistent to themselves, but not to each other.

Visual Comparison

You'll see the magic headers are the same (1f 8b 08 00), then they diverge.

Generate compressed artifacts

$ ./c -out-suffix .1.gz ./t.tar
2015/03/11 22:22:39 two passes of gzipping "./t.tar" match! (f6a2c0441248a179c8a4b3c4ab7a05b6bba002ba)
see also "t.tar.1.gz"
$ cat ./t.tar | gzip --no-name > t.tar.2.gz

View them

$ vimdiff -b ./t.tar.1.gz ./t.tar.2.gz 

See the vimdiff.png images below

package main
import (
"compress/gzip"
"crypto/sha1"
"flag"
"fmt"
"io"
"log"
"os"
"path"
)
var (
flOutput = flag.String("out-suffix", "", "output a compressed instanced of arg, with this suffix")
flBest = flag.Bool("best", false, "best compression")
flFast = flag.Bool("fast", false, "fastest compression")
)
func main() {
flag.Parse()
comp := gzip.DefaultCompression
if *flBest {
comp = gzip.BestCompression
} else if *flFast {
comp = gzip.BestSpeed
}
for _, arg := range flag.Args() {
func() {
h1 := sha1.New()
gz1, err := gzip.NewWriterLevel(h1, comp)
if err != nil {
log.Fatal(err)
}
fh, err := os.Open(arg)
if err != nil {
log.Fatal(err)
}
defer fh.Close()
if _, err := io.Copy(gz1, fh); err != nil {
log.Fatal(err)
}
if err := gz1.Close(); err != nil {
log.Fatal(err)
}
// get the checksum of the first pass
sum1 := h1.Sum(nil)
// reset everything
h1.Reset()
if _, err := fh.Seek(0, 0); err != nil {
log.Fatal(err)
}
var w io.Writer
if *flOutput != "" {
outFh, err := os.Create(fmt.Sprintf("%s%s", path.Clean(arg), *flOutput))
if err != nil {
log.Fatal(err)
}
defer outFh.Close()
defer fmt.Printf("see also %q\n", outFh.Name())
w = io.MultiWriter(outFh, h1)
} else {
w = h1
}
gz2, err := gzip.NewWriterLevel(w, comp)
if err != nil {
log.Fatal(err)
}
if _, err := io.Copy(gz2, fh); err != nil {
log.Fatal(err)
}
if err := gz2.Close(); err != nil {
log.Fatal(err)
}
// get the checksum of the second pass
sum2 := h1.Sum(nil)
if string(sum1) != string(sum2) {
log.Fatalf("two passes of gzipping %q did not match! (%x and %x)", arg, sum1, sum2)
} else {
log.Printf("two passes of gzipping %q match! (%x)", arg, sum1)
}
}()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment