Skip to content

Instantly share code, notes, and snippets.

@mwgamera
Last active August 29, 2015 14:13
Show Gist options
  • Select an option

  • Save mwgamera/39f86c31467723beb263 to your computer and use it in GitHub Desktop.

Select an option

Save mwgamera/39f86c31467723beb263 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# Examine DEFLATE streams.
# klg, Jan 2015
use strict;
use open IN => ':bytes';
#### Containers ###############################################################
sub extract_zlib { # RFC 1950
(my ($cmf, $flg), local $_) = unpack 'CCa*', shift;
die 'Bad ZLIB header' unless ($cmf * 256 + $flg) % 31 == 0;
printf("ZLIB: CMF=0x%02x FLG=0x%02x (...) ADLER32=0x%08x\n",
$cmf, $flg, unpack 'N', substr $_, length($_)-4);
die 'Unknown compression method' unless ($cmf & 0b1111) == 8;
printf("# Window size: %u, level: %u (%s)\n",
2 ** (8 + ($cmf >> 4)), $flg >> 6, {
0 => 'fastest', 1 => 'fast', 2 => 'default',
3 => 'maximum compression'
}->{$flg >> 6});
if ($flg >> 5 & 1) {
(my $dict, $_) = unpack 'Na*';
printf "# Preset dictionary ID: 0x%08x\n", $dict;
}
substr $_, 0, length($_)-4;
}
sub extract_gzip { # RFC 1952
(my ($id, $cm, $flg, $mtime,
$xfl, $os), local $_) = unpack 'vCCVCCa*', shift;
die 'Bad GZIP header' unless $id == 0x8b1f;
printf("GZIP: FLG=0x%02x MTIME=%u (...) CRC32=0x%08x ISIZE=%u\n",
$flg, $mtime, unpack 'V2', substr $_, length($_)-8);
die 'Unknown compression method' unless $cm == 8;
if ($flg & 4) { # FEXTRA
(my $xtra, $_) = unpack 'v/aa*';
printf "# Extra field: %u bytes\n", length $xtra;
}
if ($flg & 8) { # FNAME
(my $name, $_) = unpack 'Z*a*';
printf "# File name: \"%s\"\n",
$name =~ s/([^ -\[\]-~])/sprintf"\\%03o",ord$1/ger;
}
if ($flg & 16) { # FCOMMENT
(my $com, $_) = unpack 'Z*a*';
printf "# Comment: \"%s\"\n",
$com =~ s/([^ -\[\]-~])/sprintf"\\%03o",ord$1/ger;
}
if ($flg & 2) { # FHCRC
(my $hcrc, $_) = unpack 'va*';
}
substr $_, 0, length($_)-8;
}
sub extract_png { # ISO 15948
(my $magic, local $_) = unpack 'a8a*', shift;
die 'Bad PNG header' unless $magic eq "\x89PNG\r\n\x1a\n";
my $zdat = '';
while (length) {
(my ($type, $data, $crc), $_) = unpack '@!4a4@!0Nx4/aN a*';
if ($type eq 'IHDR') {
my ($w, $h, $b, $c, $z, $f, $i) = unpack 'N2C5', $data;
die 'Invalid parameters' unless $z == 0 && $f == 0;
printf "PNG: %u × %u image, %u-bit %s, %sinterlaced\n",
$w, $h, $b * (2 * (($c%4) == 2) + (!!($c&4)) + 1),
('grayscale',0,'RGB','indexed')[$c%4].($c & 4 ? '+alpha' : ''),
$i ? '' : 'non-';
} elsif ($type eq 'IDAT') {
$zdat .= $data;
} elsif ($type eq 'IEND') {
last;
}
}
extract_zlib($zdat);
}
#### Bit reader ###############################################################
sub Bitreader::new {
my ($cls, $buf) = @_;
bless {
src => $buf,
buf => 0,
len => 0,
pos => 0,
}, $cls;
}
sub Bitreader::_refill {
my $this = shift;
my $l = length $$this{src};
return unless $l;
my $x;
if ($l < 2) {
$x = unpack 'C', $$this{src};
$$this{buf} |= $x << $$this{len};
$$this{len} += 8;
$$this{pos}++;
$$this{src} = ''
} else {
($x, $$this{src}) = unpack 'va*', $$this{src};
$$this{buf} |= $x << $$this{len};
$$this{len} += 16;
$$this{pos} += 2;
}
}
sub Bitreader::eof {
my $this = shift;
!($$this{len} || length $$this{src});
}
sub Bitreader::tell {
my $this = shift;
8 * $$this{pos} - $$this{len};
}
sub Bitreader::peek {
my ($this, $n) = @_;
die 'out of bounds' if $n > 16;
$this->_refill if $$this{len} < $n;
return $$this{buf} & ((1 << $n) - 1);
}
sub Bitreader::read {
my ($this, $n) = @_;
my $x = $this->peek($n);
die 'read past eof' if $n > $$this{len};;
$$this{buf} >>= $n;
$$this{len} -= $n;
return $x;
}
sub Bitreader::bytesync {
my $this = shift;
$this->read($$this{len} % 8);
}
#### Huffman code table #######################################################
sub kraft {
my $s = 0;
$s += $_ for map 1 << (15 - $_), grep $_, @_;
return $s <= 0x8000;
}
sub Huff::new {
my $cls = shift;
my $bit = shift // 9;
bless {
len => $bit,
tab => [(undef) x (1 << $bit)]
}, $cls;
}
sub Huff::new_canon {
my $cls = shift;
die 'not uniquely decodable' unless &kraft;
my ($c, @b, @n) = (0) x 16;
$b[$_]++ for @_;
for (my $b = 1; $b < 16; $b++) {
$c = ($c + $b[$b-1]) << 1;
$n[$b] = $c;
}
my $tab = $cls->new;
for (my $i = 0; $i < @_; $i++) {
my $len = $_[$i];
if ($len) {
my $c = $n[$len] << (16 - $len);
$c = ($c >> 1 & 0x5555) | ($c << 1 & 0xaaaa);
$c = ($c >> 2 & 0x3333) | ($c << 2 & 0xcccc);
$c = ($c >> 4 & 0x0f0f) | ($c << 4 & 0xf0f0);
$c = ($c >> 8 & 0x00ff) | ($c << 8 & 0xff00);
$tab->add_code($c, $len, $i);
$n[$len]++;
}
}
return $tab;
}
sub Huff::add_code {
my ($self, $code, $len, $value) = @_;
my $k = 1 << $len;
die 'assertion failed' if $code & ~($k - 1);
if ($len <= $$self{len}) {
while ($code < @{$$self{tab}}) {
${$$self{tab}}[$code] = [$value, $len];
$code += $k;
}
} else {
my $p = $$self{tab}->[$code&((1<<$$self{len})-1)]
//= Huff->new($len - $$self{len});
$p->add_code($code >> $$self{len}, $len - $$self{len}, $value);
}
}
sub Huff::read_from {
my ($self, $bitr) = @_;
my $q = $$self{tab}->[ $bitr->peek($$self{len}) ];
if ('Huff' eq ref $q) {
$bitr->read($$self{len});
return $q->read_from($bitr);
} else {
$bitr->read($q->[1]);
return $q->[0];
}
}
#### Analyze block of DEFLATE #################################################
my $fltab = Huff->new_canon((8) x 144, (9) x 112, (7) x 24, (8) x 8);
my $fdtab = Huff->new_canon((5) x 32);
my @t_lext = ((0)x 8, map(($_) x 4, 1..5), 0);
my @t_llen = qw(3 4 5 6 7 8 9 10 11 13 15 17 19 23 27 31 35
43 51 59 67 83 99 115 131 163 195 227 258);
my @t_dext = qw(0 0 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9
10 10 11 11 12 12 13 13);
my @t_dist = qw(1 2 3 4 5 7 9 13 17 25 33 49 65 97 129 193 257 385 513
769 1025 1537 2049 3073 4097 6145 8193 12289 16385 24577);
my @t_dord = qw(16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15);
sub read_tables {
my $z = shift;
my $hlit = $z->read(5);
my $hdist = $z->read(5);
my $hclen = $z->read(4);
printf "# HLIT=%u HDIST=%u HCLEN=%u\n", $hlit, $hdist, $hclen;
my @len = (0) x 18;
$len[$t_dord[$_]] = $z->read(3) for 0 .. $hclen+4-1;
print "# HC lengths: (@len)\n";
my $hctab = Huff->new_canon(@len);
@len = ();
print "# HC codes:";
while (@len < $hlit + $hdist + 258) {
my $c = $hctab->read_from($z);
print " $c";
if ($c < 16) {
push @len, $c;
} elsif ($c == 16) {
my $x = $z->read(2);
push @len, ($len[$#len]) x (3 + $x);
} elsif ($c == 17) {
my $x = $z->read(3);
push @len, (0) x (3 + $x);
} elsif ($c == 18) {
my $x = $z->read(7);
push @len, (0) x (11 + $x);
}
}
print "\n";
die 'Assertion failed' unless @len == $hlit + $hdist + 258;
print "# Literal lengths: (@len[0 .. $hlit+257-1])\n";
my $hl = Huff->new_canon(@len[0 .. $hlit+257-1]);
print "# Distance lengths: (@len[$hlit+257 .. $#len])\n";
my $hd = Huff->new_canon(@len[$hlit+257 .. $#len]);
($hl, $hd);
}
sub read_block {
my $z = shift;
my $p = $z->tell;
my $fin = $z->read(1);
my $typ = $z->read(2);
printf "(%04x:%u)BLOCK BFINAL=%u BTYPE=%02b (%s)\n",
$p>>3, $p%8,
$fin, $typ, ('no compression', 'fixed Huffman codes',
'dynamic Huffman codes', 'ERROR')[$typ];
die 'Bad BTYPE' if $typ == 3;
if ($typ == 0) {
$z->bytesync;
my $len = $z->read(16);
my $nle = $z->read(16);
die sprintf "mismatch LEN=0x%04x NLEN=0x%04x", $len, $nle
unless 0xffff == ($len ^ $nle);
my $s = '';
for (; $len >= 2; $len -= 2) {
$s .= pack 'v', $z->read(16);
}
$s .= chr $z->read(8) if $len;
printf "%s\n", join $/, map {
sprintf " %-39s %s",
join(' ', unpack '(H4)*'),
tr/\x00-\x1f\x7f-\xff/./r
} unpack '(a16)*', $s;
} else {
my ($ltab, $dtab) = ($fltab, $fdtab);
if ($typ == 2) {
($ltab, $dtab) = read_tables($z);
}
while (!$z->eof) {
$p = $z->tell;
my $c = $ltab->read_from($z);
if ($c < 256) {
printf "(%04x:%u) L %3u <%s>\n", $p>>3, $p%8, $c,
(chr $c) =~ tr/\x00-\x1f\x7f-\xff/./r;
} else {
if ($c == 256) {
printf "(%04x:%u) E 256\n", $p>>3, $p%8;
last;
} else {
$c -= 257;
my $len = $t_llen[$c];
my $lex = $z->read($t_lext[$c]);
my $oc = $dtab->read_from($z);
my $dist = $t_dist[$oc];
my $dex = $z->read($t_dext[$oc]);
printf "(%04x:%u) R %3u(+%u) length=%u; %2u(+%u) offset=%u\n",
$p>>3, $p%8,
$c+257, $lex, $len+$lex,
$oc, $dex, $dist+$dex;
}
}
}
}
return $fin;
}
#### Main #####################################################################
my $extract = sub { @_ };
my %opts = (
-raw => sub { $extract = sub { @_ }; },
-zlib => sub { $extract = \&extract_zlib; },
-gzip => sub { $extract = \&extract_gzip; },
-png => sub { $extract = \&extract_png; },
);
push @ARGV, '-' unless scalar grep !/^-/, @ARGV;
for (@ARGV) {
($opts{-lc s/^-+//r} or
die "$0: Unknown option $_.\n")->(),
next if /^-./;
my $name = /^-$/ ? 'stdin' : $_;
eval {
my $data = do { local (@ARGV, $/) = $_; <> } or exit;
$data = Bitreader->new($extract->($data));
while (!read_block($data)) {}
1;
} or die "$0: $name: $@";
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment