Last active
November 11, 2021 21:34
-
-
Save Hafthor/9e0f76f5d0f6497f33c7a5b78a86672e to your computer and use it in GitHub Desktop.
Slow implementation of inflate in Go (zlib ~2.5x faster)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "errors" | |
| "fmt" | |
| "io" | |
| "os" | |
| "time" | |
| ) | |
| func main() { | |
| file, _ := os.Open("somefile.gz") | |
| defer file.Close() | |
| stat, _ := file.Stat() | |
| sz := stat.Size() | |
| buf := make([]byte, sz) | |
| io.ReadFull(file, buf) | |
| outl := buf[sz-1]*16777216+buf[sz-2]*65536+buf[sz-3]*256+buf[sz-4] | |
| out := make([]byte, outl) | |
| infl := NewInflatey(buf, out) | |
| infl.inputBitPos = 80 | |
| infl.Inflate() | |
| } | |
| type Inflatey struct { | |
| input []byte | |
| output []byte | |
| inputBitPos uint | |
| outputPos uint | |
| } | |
| func NewInflatey(_input, _output []byte) *Inflatey { | |
| return &Inflatey{ | |
| input: _input, | |
| output: _output, | |
| inputBitPos: 0, | |
| outputPos: 0, | |
| } | |
| } | |
| func init() { | |
| lCode := 0 | |
| length := uint(3) | |
| for _, extraBits := range lengthCodeExtraBits { | |
| lengthCode[lCode] = length | |
| lCode++ | |
| length += 1 << extraBits | |
| } | |
| lengthCode[lCode] = length - 1 // yes, we repeat this code here. /shrug? | |
| dCode := 0 | |
| dist := uint(1) | |
| for _, extraBits := range distanceCodeExtraBits { | |
| distanceCode[dCode] = dist | |
| dCode++ | |
| dist += 1 << extraBits | |
| } | |
| for i := range bitReverse { | |
| bitReverse[i] = (byte)(((i & 1) << 7) | ((i & 2) << 5) | ((i & 4) << 3) | ((i & 8) << 1) | ((i & 16) >> 1) | ((i & 32) >> 3) | ((i & 64) >> 5) | ((i & 128) >> 7)) | |
| } | |
| } | |
| var lengthCodeExtraBits = []byte{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5} | |
| var distanceCodeExtraBits = []byte{0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13} | |
| var codeLengthsOrder = []byte{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} | |
| var lengthCode = make([]uint, len(lengthCodeExtraBits)+1) | |
| var distanceCode = make([]uint, len(distanceCodeExtraBits)) | |
| var bitReverse = make([]byte, 256) | |
| const blockEnd = 256 | |
| func (infl *Inflatey) Inflate() error { | |
| for { | |
| finalBlock := infl.readForwardBits(1) // =1 if this is the last block in the stream | |
| blockType := infl.readForwardBits(2) | |
| var err error = nil | |
| if blockType == 0 { | |
| err = infl.storedBlockInflate() | |
| } else if blockType == 1 { | |
| err = infl.fixedHuffmanBlockInflate() | |
| } else if blockType == 2 { | |
| err = infl.dynamicHuffmanBlockInflate() | |
| } else { | |
| err = errors.New("unrecognized deflate block type 3") | |
| } | |
| if err != nil { | |
| return err | |
| } | |
| if finalBlock == 1 { | |
| break | |
| } | |
| } | |
| return nil | |
| } | |
| func (infl *Inflatey) storedBlockInflate() error { | |
| for (infl.inputBitPos & 7) != 0 { | |
| infl.inputBitPos++ // advance to whole byte boundary | |
| } | |
| ulen := infl.readForwardBits(16) | |
| nlen := infl.readForwardBits(16) | |
| if (nlen ^ 0xFFFF) != ulen { | |
| return errors.New("~nlen != len in stored block") | |
| } | |
| bytePos := infl.inputBitPos >> 3 | |
| input, output := infl.input, infl.output | |
| outputPos := infl.outputPos | |
| outputPosEnd := outputPos + ulen | |
| for outputPos < outputPosEnd { | |
| output[outputPos] = input[bytePos] | |
| outputPos++ | |
| bytePos++ | |
| } | |
| infl.inputBitPos = bytePos << 3 | |
| return nil | |
| } | |
| func (infl *Inflatey) fixedHuffmanBlockInflate() error { | |
| for { | |
| code7 := infl.readReverseBits(7) // code is 7 to 9 bits, but the first 7 will tell us how many more we need | |
| var code uint | |
| if code7 < 0x60 { | |
| if code7 < 0x18 { | |
| if code7 == 0 { | |
| break // BLOCK_END | |
| } | |
| code = code7 + 256 // 256 - 0x00 | |
| } else { | |
| code = ((code7 << 1) | infl.readReverseBits(1)) - 0x30 // 0 - 0x18*2 | |
| infl.output[infl.outputPos] = byte(code) | |
| infl.outputPos++ | |
| continue | |
| } | |
| } else if code7 < 0x64 { | |
| code = ((code7 << 1) | infl.readReverseBits(1)) + 88 // 280 - 0x60*2 | |
| } else { | |
| code = ((code7 << 2) | infl.readReverseBits(2)) - 256 // 144 - 0x64*4 | |
| infl.output[infl.outputPos] = byte(code) | |
| infl.outputPos++ | |
| continue | |
| } | |
| // length/distance code | |
| lCode := code - 257 | |
| length := lengthCode[lCode] + infl.readForwardBits(lengthCodeExtraBits[lCode]) | |
| // fixedHuffmanBlockInflate just reads 5 bits to figure out distanceIndex | |
| dCode := infl.readReverseBits(5) | |
| distance := distanceCode[dCode] + infl.readForwardBits(distanceCodeExtraBits[dCode]) | |
| // copy length bytes start from -distance (length may be more than distance!) | |
| infl.outputCopy(infl.outputPos-distance, length) | |
| } | |
| return nil | |
| } | |
| func (infl *Inflatey) dynamicHuffmanBlockInflate() error { | |
| // read the huffman info for the huffman info, lol | |
| lengthCodeCount := infl.readForwardBits(5) | |
| literalAndLengthsCodeCount := lengthCodeCount + 257 // 256 literals + 1 stop code | |
| distanceCodesCount := infl.readForwardBits(5) + 1 // +1 because we know we will have at least 1 | |
| codeLengthsCount := infl.readForwardBits(4) + 4 // +4 because we know we will need to have code length 0 | |
| // build a code lengths huffman tree | |
| codeLengths := make([]byte, len(codeLengthsOrder)) | |
| for codeLengthsIndex := uint(0); codeLengthsIndex < codeLengthsCount; codeLengthsIndex++ { | |
| codeLengths[codeLengthsOrder[codeLengthsIndex]] = byte(infl.readForwardBits(3)) | |
| } | |
| huffCodeLens, err := infl.buildHuffmanMap(codeLengths, 0, uint(len(codeLengths))) | |
| if err != nil { | |
| return err | |
| } | |
| // gather code lengths for literals/length and distance codes | |
| allCodeLengths := make([]byte, literalAndLengthsCodeCount+distanceCodesCount) | |
| for allCodeLengthsIndex, l := uint(0), literalAndLengthsCodeCount+distanceCodesCount; allCodeLengthsIndex < l; { | |
| codeLengthCode := infl.readCodeWithHuffmanMap(huffCodeLens) | |
| switch codeLengthCode { | |
| case 18: | |
| allCodeLengthsIndex += infl.readForwardBits(7) + 11 | |
| break | |
| case 17: | |
| allCodeLengthsIndex += infl.readForwardBits(3) + 3 | |
| break | |
| case 16: | |
| copyCount := infl.readForwardBits(2) + 3 | |
| src := allCodeLengths[allCodeLengthsIndex-1] | |
| for i := uint(0); i < copyCount; i++ { | |
| allCodeLengths[allCodeLengthsIndex] = src | |
| allCodeLengthsIndex++ | |
| } | |
| break | |
| default: | |
| allCodeLengths[allCodeLengthsIndex] = byte(codeLengthCode) | |
| allCodeLengthsIndex++ | |
| } | |
| } | |
| huffmanLiteralsAndLengths, err := infl.buildHuffmanMap(allCodeLengths, 0, literalAndLengthsCodeCount) | |
| if err != nil { | |
| return err | |
| } | |
| huffmanDistanceLengths, err := infl.buildHuffmanMap(allCodeLengths, literalAndLengthsCodeCount, uint(len(allCodeLengths))) | |
| if err != nil { | |
| return err | |
| } | |
| output := infl.output | |
| for { | |
| code := infl.readCodeWithHuffmanMap(huffmanLiteralsAndLengths) | |
| if code == blockEnd { | |
| break | |
| } else if code < blockEnd { // literal | |
| output[infl.outputPos] = byte(code) | |
| infl.outputPos++ | |
| } else { // length/distance code | |
| lCode := code - 257 | |
| length := lengthCode[lCode] + infl.readForwardBits(lengthCodeExtraBits[lCode]) | |
| dCode := infl.readCodeWithHuffmanMap(huffmanDistanceLengths) | |
| distance := distanceCode[dCode] + infl.readForwardBits(distanceCodeExtraBits[dCode]) | |
| // copy length bytes start from -distance (length may be more than distance!) | |
| infl.outputCopy(infl.outputPos-distance, length) | |
| } | |
| } | |
| return nil | |
| } | |
| func (infl *Inflatey) buildHuffmanMap(bitLengths []byte, start, end uint) ([]uint16, error) { | |
| // scan for largest bit length | |
| maxLengths := byte(0) | |
| for i := start; i < end; i++ { | |
| bitLength := bitLengths[i] | |
| if bitLength > maxLengths { | |
| maxLengths = bitLength | |
| } | |
| } | |
| repeats := 1 << maxLengths | |
| huffmanMap := make([]uint16, repeats+1) | |
| c := 0 | |
| huffmanMap[c] = uint16(maxLengths) | |
| c++ | |
| for bits:=byte(1);bits<=maxLengths;bits++ { | |
| repeats >>= 1 | |
| xbits := uint16(maxLengths - bits) << 9 | |
| for i:=start;i<end;i++ { | |
| if bitLengths[i] == bits { | |
| x := uint16(i-start)+xbits | |
| for e:=c+repeats; c < e; c++ { | |
| huffmanMap[c] = x | |
| } | |
| } | |
| } | |
| } | |
| return huffmanMap, nil | |
| } | |
| func (infl *Inflatey) readCodeWithHuffmanMap(huffmanMap []uint16) uint16 { | |
| maxBits := huffmanMap[0] | |
| v := infl.readReverseBits(byte(maxBits)) | |
| xc := huffmanMap[v+1] | |
| infl.inputBitPos -= uint(xc) >> 9 | |
| return xc & 0x1FF | |
| } | |
| func (infl *Inflatey) readReverseBits(bitCount byte) uint { | |
| ibp := infl.inputBitPos | |
| bytePos := ibp >> 3 | |
| ibp += uint(bitCount) | |
| infl.inputBitPos = ibp | |
| input := infl.input | |
| mask := uint(1<<bitCount) - 1 | |
| n := uint(bitReverse[input[bytePos]]) | |
| bytePosEnd := ibp >> 3 | |
| bitPosEnd := ibp & 7 | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n = (n << 8) | uint(bitReverse[input[bytePos]]) | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n = (n << 8) | uint(bitReverse[input[bytePos]]) | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n = (n << 8) | uint(bitReverse[input[bytePos]]) | |
| } | |
| } | |
| } | |
| return (n >> (8 - bitPosEnd)) & mask | |
| } | |
| func (infl *Inflatey) readForwardBits(bitCount byte) uint { | |
| ibp, input := infl.inputBitPos, infl.input | |
| bytePos, bitPos := ibp>>3, ibp&7 | |
| mask := uint(1<<bitCount) - 1 | |
| ibp += uint(bitCount) | |
| infl.inputBitPos = ibp | |
| n := uint(input[bytePos]) | |
| bytePosEnd := ibp >> 3 | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n |= uint(input[bytePos]) << 8 | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n |= uint(input[bytePos]) << 16 | |
| if bytePos < bytePosEnd { | |
| bytePos++ | |
| n |= uint(input[bytePos]) << 24 | |
| } | |
| } | |
| } | |
| return (n >> bitPos) & mask | |
| } | |
| func (infl *Inflatey) outputCopy(sourceOffset, length uint) { | |
| si, di, edi := sourceOffset, infl.outputPos, infl.outputPos+length | |
| infl.outputPos = edi | |
| o := infl.output | |
| for di < edi { | |
| o[di] = o[si] | |
| di++ | |
| si++ | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment