Skip to content

Instantly share code, notes, and snippets.

@Hafthor
Last active November 11, 2021 21:34
Show Gist options
  • Select an option

  • Save Hafthor/9e0f76f5d0f6497f33c7a5b78a86672e to your computer and use it in GitHub Desktop.

Select an option

Save Hafthor/9e0f76f5d0f6497f33c7a5b78a86672e to your computer and use it in GitHub Desktop.
Slow implementation of inflate in Go (zlib ~2.5x faster)
package main
import (
"errors"
"fmt"
"io"
"os"
"time"
)
func main() {
file, _ := os.Open("somefile.gz")
defer file.Close()
stat, _ := file.Stat()
sz := stat.Size()
buf := make([]byte, sz)
io.ReadFull(file, buf)
outl := buf[sz-1]*16777216+buf[sz-2]*65536+buf[sz-3]*256+buf[sz-4]
out := make([]byte, outl)
infl := NewInflatey(buf, out)
infl.inputBitPos = 80
infl.Inflate()
}
type Inflatey struct {
input []byte
output []byte
inputBitPos uint
outputPos uint
}
func NewInflatey(_input, _output []byte) *Inflatey {
return &Inflatey{
input: _input,
output: _output,
inputBitPos: 0,
outputPos: 0,
}
}
func init() {
lCode := 0
length := uint(3)
for _, extraBits := range lengthCodeExtraBits {
lengthCode[lCode] = length
lCode++
length += 1 << extraBits
}
lengthCode[lCode] = length - 1 // yes, we repeat this code here. /shrug?
dCode := 0
dist := uint(1)
for _, extraBits := range distanceCodeExtraBits {
distanceCode[dCode] = dist
dCode++
dist += 1 << extraBits
}
for i := range bitReverse {
bitReverse[i] = (byte)(((i & 1) << 7) | ((i & 2) << 5) | ((i & 4) << 3) | ((i & 8) << 1) | ((i & 16) >> 1) | ((i & 32) >> 3) | ((i & 64) >> 5) | ((i & 128) >> 7))
}
}
var lengthCodeExtraBits = []byte{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5}
var distanceCodeExtraBits = []byte{0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}
var codeLengthsOrder = []byte{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
var lengthCode = make([]uint, len(lengthCodeExtraBits)+1)
var distanceCode = make([]uint, len(distanceCodeExtraBits))
var bitReverse = make([]byte, 256)
const blockEnd = 256
func (infl *Inflatey) Inflate() error {
for {
finalBlock := infl.readForwardBits(1) // =1 if this is the last block in the stream
blockType := infl.readForwardBits(2)
var err error = nil
if blockType == 0 {
err = infl.storedBlockInflate()
} else if blockType == 1 {
err = infl.fixedHuffmanBlockInflate()
} else if blockType == 2 {
err = infl.dynamicHuffmanBlockInflate()
} else {
err = errors.New("unrecognized deflate block type 3")
}
if err != nil {
return err
}
if finalBlock == 1 {
break
}
}
return nil
}
func (infl *Inflatey) storedBlockInflate() error {
for (infl.inputBitPos & 7) != 0 {
infl.inputBitPos++ // advance to whole byte boundary
}
ulen := infl.readForwardBits(16)
nlen := infl.readForwardBits(16)
if (nlen ^ 0xFFFF) != ulen {
return errors.New("~nlen != len in stored block")
}
bytePos := infl.inputBitPos >> 3
input, output := infl.input, infl.output
outputPos := infl.outputPos
outputPosEnd := outputPos + ulen
for outputPos < outputPosEnd {
output[outputPos] = input[bytePos]
outputPos++
bytePos++
}
infl.inputBitPos = bytePos << 3
return nil
}
func (infl *Inflatey) fixedHuffmanBlockInflate() error {
for {
code7 := infl.readReverseBits(7) // code is 7 to 9 bits, but the first 7 will tell us how many more we need
var code uint
if code7 < 0x60 {
if code7 < 0x18 {
if code7 == 0 {
break // BLOCK_END
}
code = code7 + 256 // 256 - 0x00
} else {
code = ((code7 << 1) | infl.readReverseBits(1)) - 0x30 // 0 - 0x18*2
infl.output[infl.outputPos] = byte(code)
infl.outputPos++
continue
}
} else if code7 < 0x64 {
code = ((code7 << 1) | infl.readReverseBits(1)) + 88 // 280 - 0x60*2
} else {
code = ((code7 << 2) | infl.readReverseBits(2)) - 256 // 144 - 0x64*4
infl.output[infl.outputPos] = byte(code)
infl.outputPos++
continue
}
// length/distance code
lCode := code - 257
length := lengthCode[lCode] + infl.readForwardBits(lengthCodeExtraBits[lCode])
// fixedHuffmanBlockInflate just reads 5 bits to figure out distanceIndex
dCode := infl.readReverseBits(5)
distance := distanceCode[dCode] + infl.readForwardBits(distanceCodeExtraBits[dCode])
// copy length bytes start from -distance (length may be more than distance!)
infl.outputCopy(infl.outputPos-distance, length)
}
return nil
}
func (infl *Inflatey) dynamicHuffmanBlockInflate() error {
// read the huffman info for the huffman info, lol
lengthCodeCount := infl.readForwardBits(5)
literalAndLengthsCodeCount := lengthCodeCount + 257 // 256 literals + 1 stop code
distanceCodesCount := infl.readForwardBits(5) + 1 // +1 because we know we will have at least 1
codeLengthsCount := infl.readForwardBits(4) + 4 // +4 because we know we will need to have code length 0
// build a code lengths huffman tree
codeLengths := make([]byte, len(codeLengthsOrder))
for codeLengthsIndex := uint(0); codeLengthsIndex < codeLengthsCount; codeLengthsIndex++ {
codeLengths[codeLengthsOrder[codeLengthsIndex]] = byte(infl.readForwardBits(3))
}
huffCodeLens, err := infl.buildHuffmanMap(codeLengths, 0, uint(len(codeLengths)))
if err != nil {
return err
}
// gather code lengths for literals/length and distance codes
allCodeLengths := make([]byte, literalAndLengthsCodeCount+distanceCodesCount)
for allCodeLengthsIndex, l := uint(0), literalAndLengthsCodeCount+distanceCodesCount; allCodeLengthsIndex < l; {
codeLengthCode := infl.readCodeWithHuffmanMap(huffCodeLens)
switch codeLengthCode {
case 18:
allCodeLengthsIndex += infl.readForwardBits(7) + 11
break
case 17:
allCodeLengthsIndex += infl.readForwardBits(3) + 3
break
case 16:
copyCount := infl.readForwardBits(2) + 3
src := allCodeLengths[allCodeLengthsIndex-1]
for i := uint(0); i < copyCount; i++ {
allCodeLengths[allCodeLengthsIndex] = src
allCodeLengthsIndex++
}
break
default:
allCodeLengths[allCodeLengthsIndex] = byte(codeLengthCode)
allCodeLengthsIndex++
}
}
huffmanLiteralsAndLengths, err := infl.buildHuffmanMap(allCodeLengths, 0, literalAndLengthsCodeCount)
if err != nil {
return err
}
huffmanDistanceLengths, err := infl.buildHuffmanMap(allCodeLengths, literalAndLengthsCodeCount, uint(len(allCodeLengths)))
if err != nil {
return err
}
output := infl.output
for {
code := infl.readCodeWithHuffmanMap(huffmanLiteralsAndLengths)
if code == blockEnd {
break
} else if code < blockEnd { // literal
output[infl.outputPos] = byte(code)
infl.outputPos++
} else { // length/distance code
lCode := code - 257
length := lengthCode[lCode] + infl.readForwardBits(lengthCodeExtraBits[lCode])
dCode := infl.readCodeWithHuffmanMap(huffmanDistanceLengths)
distance := distanceCode[dCode] + infl.readForwardBits(distanceCodeExtraBits[dCode])
// copy length bytes start from -distance (length may be more than distance!)
infl.outputCopy(infl.outputPos-distance, length)
}
}
return nil
}
func (infl *Inflatey) buildHuffmanMap(bitLengths []byte, start, end uint) ([]uint16, error) {
// scan for largest bit length
maxLengths := byte(0)
for i := start; i < end; i++ {
bitLength := bitLengths[i]
if bitLength > maxLengths {
maxLengths = bitLength
}
}
repeats := 1 << maxLengths
huffmanMap := make([]uint16, repeats+1)
c := 0
huffmanMap[c] = uint16(maxLengths)
c++
for bits:=byte(1);bits<=maxLengths;bits++ {
repeats >>= 1
xbits := uint16(maxLengths - bits) << 9
for i:=start;i<end;i++ {
if bitLengths[i] == bits {
x := uint16(i-start)+xbits
for e:=c+repeats; c < e; c++ {
huffmanMap[c] = x
}
}
}
}
return huffmanMap, nil
}
func (infl *Inflatey) readCodeWithHuffmanMap(huffmanMap []uint16) uint16 {
maxBits := huffmanMap[0]
v := infl.readReverseBits(byte(maxBits))
xc := huffmanMap[v+1]
infl.inputBitPos -= uint(xc) >> 9
return xc & 0x1FF
}
func (infl *Inflatey) readReverseBits(bitCount byte) uint {
ibp := infl.inputBitPos
bytePos := ibp >> 3
ibp += uint(bitCount)
infl.inputBitPos = ibp
input := infl.input
mask := uint(1<<bitCount) - 1
n := uint(bitReverse[input[bytePos]])
bytePosEnd := ibp >> 3
bitPosEnd := ibp & 7
if bytePos < bytePosEnd {
bytePos++
n = (n << 8) | uint(bitReverse[input[bytePos]])
if bytePos < bytePosEnd {
bytePos++
n = (n << 8) | uint(bitReverse[input[bytePos]])
if bytePos < bytePosEnd {
bytePos++
n = (n << 8) | uint(bitReverse[input[bytePos]])
}
}
}
return (n >> (8 - bitPosEnd)) & mask
}
func (infl *Inflatey) readForwardBits(bitCount byte) uint {
ibp, input := infl.inputBitPos, infl.input
bytePos, bitPos := ibp>>3, ibp&7
mask := uint(1<<bitCount) - 1
ibp += uint(bitCount)
infl.inputBitPos = ibp
n := uint(input[bytePos])
bytePosEnd := ibp >> 3
if bytePos < bytePosEnd {
bytePos++
n |= uint(input[bytePos]) << 8
if bytePos < bytePosEnd {
bytePos++
n |= uint(input[bytePos]) << 16
if bytePos < bytePosEnd {
bytePos++
n |= uint(input[bytePos]) << 24
}
}
}
return (n >> bitPos) & mask
}
func (infl *Inflatey) outputCopy(sourceOffset, length uint) {
si, di, edi := sourceOffset, infl.outputPos, infl.outputPos+length
infl.outputPos = edi
o := infl.output
for di < edi {
o[di] = o[si]
di++
si++
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment