Last active
August 29, 2015 14:13
-
-
Save mwgamera/39f86c31467723beb263 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| # Examine DEFLATE streams. | |
| # klg, Jan 2015 | |
| use strict; | |
| use open IN => ':bytes'; | |
| #### Containers ############################################################### | |
| sub extract_zlib { # RFC 1950 | |
| (my ($cmf, $flg), local $_) = unpack 'CCa*', shift; | |
| die 'Bad ZLIB header' unless ($cmf * 256 + $flg) % 31 == 0; | |
| printf("ZLIB: CMF=0x%02x FLG=0x%02x (...) ADLER32=0x%08x\n", | |
| $cmf, $flg, unpack 'N', substr $_, length($_)-4); | |
| die 'Unknown compression method' unless ($cmf & 0b1111) == 8; | |
| printf("# Window size: %u, level: %u (%s)\n", | |
| 2 ** (8 + ($cmf >> 4)), $flg >> 6, { | |
| 0 => 'fastest', 1 => 'fast', 2 => 'default', | |
| 3 => 'maximum compression' | |
| }->{$flg >> 6}); | |
| if ($flg >> 5 & 1) { | |
| (my $dict, $_) = unpack 'Na*'; | |
| printf "# Preset dictionary ID: 0x%08x\n", $dict; | |
| } | |
| substr $_, 0, length($_)-4; | |
| } | |
| sub extract_gzip { # RFC 1952 | |
| (my ($id, $cm, $flg, $mtime, | |
| $xfl, $os), local $_) = unpack 'vCCVCCa*', shift; | |
| die 'Bad GZIP header' unless $id == 0x8b1f; | |
| printf("GZIP: FLG=0x%02x MTIME=%u (...) CRC32=0x%08x ISIZE=%u\n", | |
| $flg, $mtime, unpack 'V2', substr $_, length($_)-8); | |
| die 'Unknown compression method' unless $cm == 8; | |
| if ($flg & 4) { # FEXTRA | |
| (my $xtra, $_) = unpack 'v/aa*'; | |
| printf "# Extra field: %u bytes\n", length $xtra; | |
| } | |
| if ($flg & 8) { # FNAME | |
| (my $name, $_) = unpack 'Z*a*'; | |
| printf "# File name: \"%s\"\n", | |
| $name =~ s/([^ -\[\]-~])/sprintf"\\%03o",ord$1/ger; | |
| } | |
| if ($flg & 16) { # FCOMMENT | |
| (my $com, $_) = unpack 'Z*a*'; | |
| printf "# Comment: \"%s\"\n", | |
| $com =~ s/([^ -\[\]-~])/sprintf"\\%03o",ord$1/ger; | |
| } | |
| if ($flg & 2) { # FHCRC | |
| (my $hcrc, $_) = unpack 'va*'; | |
| } | |
| substr $_, 0, length($_)-8; | |
| } | |
| sub extract_png { # ISO 15948 | |
| (my $magic, local $_) = unpack 'a8a*', shift; | |
| die 'Bad PNG header' unless $magic eq "\x89PNG\r\n\x1a\n"; | |
| my $zdat = ''; | |
| while (length) { | |
| (my ($type, $data, $crc), $_) = unpack '@!4a4@!0Nx4/aN a*'; | |
| if ($type eq 'IHDR') { | |
| my ($w, $h, $b, $c, $z, $f, $i) = unpack 'N2C5', $data; | |
| die 'Invalid parameters' unless $z == 0 && $f == 0; | |
| printf "PNG: %u × %u image, %u-bit %s, %sinterlaced\n", | |
| $w, $h, $b * (2 * (($c%4) == 2) + (!!($c&4)) + 1), | |
| ('grayscale',0,'RGB','indexed')[$c%4].($c & 4 ? '+alpha' : ''), | |
| $i ? '' : 'non-'; | |
| } elsif ($type eq 'IDAT') { | |
| $zdat .= $data; | |
| } elsif ($type eq 'IEND') { | |
| last; | |
| } | |
| } | |
| extract_zlib($zdat); | |
| } | |
| #### Bit reader ############################################################### | |
| sub Bitreader::new { | |
| my ($cls, $buf) = @_; | |
| bless { | |
| src => $buf, | |
| buf => 0, | |
| len => 0, | |
| pos => 0, | |
| }, $cls; | |
| } | |
| sub Bitreader::_refill { | |
| my $this = shift; | |
| my $l = length $$this{src}; | |
| return unless $l; | |
| my $x; | |
| if ($l < 2) { | |
| $x = unpack 'C', $$this{src}; | |
| $$this{buf} |= $x << $$this{len}; | |
| $$this{len} += 8; | |
| $$this{pos}++; | |
| $$this{src} = '' | |
| } else { | |
| ($x, $$this{src}) = unpack 'va*', $$this{src}; | |
| $$this{buf} |= $x << $$this{len}; | |
| $$this{len} += 16; | |
| $$this{pos} += 2; | |
| } | |
| } | |
| sub Bitreader::eof { | |
| my $this = shift; | |
| !($$this{len} || length $$this{src}); | |
| } | |
| sub Bitreader::tell { | |
| my $this = shift; | |
| 8 * $$this{pos} - $$this{len}; | |
| } | |
| sub Bitreader::peek { | |
| my ($this, $n) = @_; | |
| die 'out of bounds' if $n > 16; | |
| $this->_refill if $$this{len} < $n; | |
| return $$this{buf} & ((1 << $n) - 1); | |
| } | |
| sub Bitreader::read { | |
| my ($this, $n) = @_; | |
| my $x = $this->peek($n); | |
| die 'read past eof' if $n > $$this{len};; | |
| $$this{buf} >>= $n; | |
| $$this{len} -= $n; | |
| return $x; | |
| } | |
| sub Bitreader::bytesync { | |
| my $this = shift; | |
| $this->read($$this{len} % 8); | |
| } | |
| #### Huffman code table ####################################################### | |
| sub kraft { | |
| my $s = 0; | |
| $s += $_ for map 1 << (15 - $_), grep $_, @_; | |
| return $s <= 0x8000; | |
| } | |
| sub Huff::new { | |
| my $cls = shift; | |
| my $bit = shift // 9; | |
| bless { | |
| len => $bit, | |
| tab => [(undef) x (1 << $bit)] | |
| }, $cls; | |
| } | |
| sub Huff::new_canon { | |
| my $cls = shift; | |
| die 'not uniquely decodable' unless &kraft; | |
| my ($c, @b, @n) = (0) x 16; | |
| $b[$_]++ for @_; | |
| for (my $b = 1; $b < 16; $b++) { | |
| $c = ($c + $b[$b-1]) << 1; | |
| $n[$b] = $c; | |
| } | |
| my $tab = $cls->new; | |
| for (my $i = 0; $i < @_; $i++) { | |
| my $len = $_[$i]; | |
| if ($len) { | |
| my $c = $n[$len] << (16 - $len); | |
| $c = ($c >> 1 & 0x5555) | ($c << 1 & 0xaaaa); | |
| $c = ($c >> 2 & 0x3333) | ($c << 2 & 0xcccc); | |
| $c = ($c >> 4 & 0x0f0f) | ($c << 4 & 0xf0f0); | |
| $c = ($c >> 8 & 0x00ff) | ($c << 8 & 0xff00); | |
| $tab->add_code($c, $len, $i); | |
| $n[$len]++; | |
| } | |
| } | |
| return $tab; | |
| } | |
| sub Huff::add_code { | |
| my ($self, $code, $len, $value) = @_; | |
| my $k = 1 << $len; | |
| die 'assertion failed' if $code & ~($k - 1); | |
| if ($len <= $$self{len}) { | |
| while ($code < @{$$self{tab}}) { | |
| ${$$self{tab}}[$code] = [$value, $len]; | |
| $code += $k; | |
| } | |
| } else { | |
| my $p = $$self{tab}->[$code&((1<<$$self{len})-1)] | |
| //= Huff->new($len - $$self{len}); | |
| $p->add_code($code >> $$self{len}, $len - $$self{len}, $value); | |
| } | |
| } | |
| sub Huff::read_from { | |
| my ($self, $bitr) = @_; | |
| my $q = $$self{tab}->[ $bitr->peek($$self{len}) ]; | |
| if ('Huff' eq ref $q) { | |
| $bitr->read($$self{len}); | |
| return $q->read_from($bitr); | |
| } else { | |
| $bitr->read($q->[1]); | |
| return $q->[0]; | |
| } | |
| } | |
| #### Analyze block of DEFLATE ################################################# | |
| my $fltab = Huff->new_canon((8) x 144, (9) x 112, (7) x 24, (8) x 8); | |
| my $fdtab = Huff->new_canon((5) x 32); | |
| my @t_lext = ((0)x 8, map(($_) x 4, 1..5), 0); | |
| my @t_llen = qw(3 4 5 6 7 8 9 10 11 13 15 17 19 23 27 31 35 | |
| 43 51 59 67 83 99 115 131 163 195 227 258); | |
| my @t_dext = qw(0 0 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 | |
| 10 10 11 11 12 12 13 13); | |
| my @t_dist = qw(1 2 3 4 5 7 9 13 17 25 33 49 65 97 129 193 257 385 513 | |
| 769 1025 1537 2049 3073 4097 6145 8193 12289 16385 24577); | |
| my @t_dord = qw(16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15); | |
| sub read_tables { | |
| my $z = shift; | |
| my $hlit = $z->read(5); | |
| my $hdist = $z->read(5); | |
| my $hclen = $z->read(4); | |
| printf "# HLIT=%u HDIST=%u HCLEN=%u\n", $hlit, $hdist, $hclen; | |
| my @len = (0) x 18; | |
| $len[$t_dord[$_]] = $z->read(3) for 0 .. $hclen+4-1; | |
| print "# HC lengths: (@len)\n"; | |
| my $hctab = Huff->new_canon(@len); | |
| @len = (); | |
| print "# HC codes:"; | |
| while (@len < $hlit + $hdist + 258) { | |
| my $c = $hctab->read_from($z); | |
| print " $c"; | |
| if ($c < 16) { | |
| push @len, $c; | |
| } elsif ($c == 16) { | |
| my $x = $z->read(2); | |
| push @len, ($len[$#len]) x (3 + $x); | |
| } elsif ($c == 17) { | |
| my $x = $z->read(3); | |
| push @len, (0) x (3 + $x); | |
| } elsif ($c == 18) { | |
| my $x = $z->read(7); | |
| push @len, (0) x (11 + $x); | |
| } | |
| } | |
| print "\n"; | |
| die 'Assertion failed' unless @len == $hlit + $hdist + 258; | |
| print "# Literal lengths: (@len[0 .. $hlit+257-1])\n"; | |
| my $hl = Huff->new_canon(@len[0 .. $hlit+257-1]); | |
| print "# Distance lengths: (@len[$hlit+257 .. $#len])\n"; | |
| my $hd = Huff->new_canon(@len[$hlit+257 .. $#len]); | |
| ($hl, $hd); | |
| } | |
| sub read_block { | |
| my $z = shift; | |
| my $p = $z->tell; | |
| my $fin = $z->read(1); | |
| my $typ = $z->read(2); | |
| printf "(%04x:%u)BLOCK BFINAL=%u BTYPE=%02b (%s)\n", | |
| $p>>3, $p%8, | |
| $fin, $typ, ('no compression', 'fixed Huffman codes', | |
| 'dynamic Huffman codes', 'ERROR')[$typ]; | |
| die 'Bad BTYPE' if $typ == 3; | |
| if ($typ == 0) { | |
| $z->bytesync; | |
| my $len = $z->read(16); | |
| my $nle = $z->read(16); | |
| die sprintf "mismatch LEN=0x%04x NLEN=0x%04x", $len, $nle | |
| unless 0xffff == ($len ^ $nle); | |
| my $s = ''; | |
| for (; $len >= 2; $len -= 2) { | |
| $s .= pack 'v', $z->read(16); | |
| } | |
| $s .= chr $z->read(8) if $len; | |
| printf "%s\n", join $/, map { | |
| sprintf " %-39s %s", | |
| join(' ', unpack '(H4)*'), | |
| tr/\x00-\x1f\x7f-\xff/./r | |
| } unpack '(a16)*', $s; | |
| } else { | |
| my ($ltab, $dtab) = ($fltab, $fdtab); | |
| if ($typ == 2) { | |
| ($ltab, $dtab) = read_tables($z); | |
| } | |
| while (!$z->eof) { | |
| $p = $z->tell; | |
| my $c = $ltab->read_from($z); | |
| if ($c < 256) { | |
| printf "(%04x:%u) L %3u <%s>\n", $p>>3, $p%8, $c, | |
| (chr $c) =~ tr/\x00-\x1f\x7f-\xff/./r; | |
| } else { | |
| if ($c == 256) { | |
| printf "(%04x:%u) E 256\n", $p>>3, $p%8; | |
| last; | |
| } else { | |
| $c -= 257; | |
| my $len = $t_llen[$c]; | |
| my $lex = $z->read($t_lext[$c]); | |
| my $oc = $dtab->read_from($z); | |
| my $dist = $t_dist[$oc]; | |
| my $dex = $z->read($t_dext[$oc]); | |
| printf "(%04x:%u) R %3u(+%u) length=%u; %2u(+%u) offset=%u\n", | |
| $p>>3, $p%8, | |
| $c+257, $lex, $len+$lex, | |
| $oc, $dex, $dist+$dex; | |
| } | |
| } | |
| } | |
| } | |
| return $fin; | |
| } | |
| #### Main ##################################################################### | |
| my $extract = sub { @_ }; | |
| my %opts = ( | |
| -raw => sub { $extract = sub { @_ }; }, | |
| -zlib => sub { $extract = \&extract_zlib; }, | |
| -gzip => sub { $extract = \&extract_gzip; }, | |
| -png => sub { $extract = \&extract_png; }, | |
| ); | |
| push @ARGV, '-' unless scalar grep !/^-/, @ARGV; | |
| for (@ARGV) { | |
| ($opts{-lc s/^-+//r} or | |
| die "$0: Unknown option $_.\n")->(), | |
| next if /^-./; | |
| my $name = /^-$/ ? 'stdin' : $_; | |
| eval { | |
| my $data = do { local (@ARGV, $/) = $_; <> } or exit; | |
| $data = Bitreader->new($extract->($data)); | |
| while (!read_block($data)) {} | |
| 1; | |
| } or die "$0: $name: $@"; | |
| } | |
| 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment