Skip to content

Instantly share code, notes, and snippets.

@mwgamera
Last active October 13, 2021 21:53
Show Gist options
  • Select an option

  • Save mwgamera/9774095 to your computer and use it in GitHub Desktop.

Select an option

Save mwgamera/9774095 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
# Analyze GZIP header for anything unusual, -v to print the usual too.
# klg, Dec 2013
use strict;
use constant GZIP_ID => 0x8b1f;
use constant {
FLG_FTEXT => (1 << 0),
FLG_FHCRC => (1 << 1),
FLG_FEXTRA => (1 << 2),
FLG_FNAME => (1 << 3),
FLG_FCOMMENT => (1 << 4),
FLG_RESERVED => 0xff & (-1 << 5)
};
use constant CM => {
8 => 'DEFLATE'
};
use constant OS => {
0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
1 => 'Amiga',
2 => 'VMS (or OpenVMS)',
3 => 'Unix',
4 => 'VM/CMS',
5 => 'Atari TOS',
6 => 'HPFS filesystem (OS/2, NT)',
7 => 'Macintosh',
8 => 'Z-System',
9 => 'CP/M',
10 => 'TOPS-20',
11 => 'NTFS filesystem (NT)',
12 => 'QDOS',
13 => 'Acorn RISCOS',
14 => 'VFAT file system (Win95, NT)',
15 => 'MVS (code also taken for PRIMOS)',
16 => 'BeOS (BeBox or PowerMac)',
17 => 'Tandem/NSK',
18 => 'THEOS',
19 => 'Mac OS/X (Darwin)',
30 => 'AtheOS',
255 => 'unknown'
};
use constant XFL => {
0 => 'default compression',
2 => 'maximum compression',
4 => 'fastest algorithm',
};
use constant EXTRA => {
0x4143 => 'Acorn RISC OS/BBC MOS file type information',
0x4170 => 'Apollo file type information (RFC 1952)',
0x4243 => 'Compressed size (BZGF)',
0x4753 => 'Public-key signature (gzsig)',
0x4947 => 'Member size (mgzip)',
0x4b4e => 'KeyNote assertion (RFC 2704)',
0x4d63 => 'Macintosh info (Type and Creator values)',
0x5241 => 'Random Access (dictzip)',
0x524f => 'Acorn Risc OS file type information',
0x6170 => 'Append information (gzlog)',
0x6370 => 'file compressed by cpio',
0x736c => 'WARC/0.9 skip-lengths',
};
use constant UNKNOWN => "\033[31mUNKNOWN\033[0m";
sub hlight($) {
$_ = shift; "\033[1m$_\033[0m"
}
sub binpstr($) {
$_ = shift;
s{\0$}{};
my $utf8 = !m/^[\0-\x7f]*$/ && m{
^(
[\0-\x7f] |
[\xc0-\xdf][\x80-\xbf] |
[\xe0-\xef][\x80-\xbf]{2} |
[\xf0-\xf7][\x80-\xbf]{3} |
[\xf8-\xfb][\x80-\xbf]{4} |
[\xfc-\xfd][\x80-\xbf]{5}
)+$
}x;
utf8::decode $_ if $utf8;
s{([\0-\x1f\x7f-\x9f])}{
hlight({
"\0" => '\0',
"\a" => '\a',
"\e" => '\e',
"\b" => '\b',
"\f" => '\f',
"\n" => '\n',
"\r" => '\r',
"\t" => '\t',
"\"" => '\"',
"\\" => '\\',
}->{$1} // sprintf '\\%03o', ord $1)
}ge;
utf8::encode $_;
($utf8 ? '(UTF-8)' : '') . "\"$_\""
}
sub bflags($) {
my $x = shift;
join '+', grep $x & (1<<$_), 0 .. 63;
}
sub hexdump($%) {
(local $_, my %opts) = @_;
my $p = $opts{prefix} // '';
my $g = $opts{grou} || 2;
my $c = $opts{cols};
unless ($c) {
$c = ($ENV{COLUMNS} || 80) - length $p;
$c = int(($c - 3) / (3*$g+1)) * $g;
}
$c = $g if $c < $g;
join '', map {
sprintf "%s%-*s %s\n", $p, $c/$g*(2*$g+1),
join(' ', unpack '(a'.(2*$g).')*', unpack 'H*', $_),
$_ =~ tr/\x00-\x20\x7f-\xff/./r
} unpack "(a$c)*"
}
shift @ARGV if my $verbose = $#ARGV >= 0 && $ARGV[0] eq '-v';
push @ARGV, '-' if $#ARGV < 0;
for my $file (@ARGV) {
my $fn = $file eq '-' ? '' : "$file: ";
open my $fh, $file or print(STDERR "${fn}$!\n"), next;
binmode $fh;
read $fh, $_, 10;
my ($id, $cm, $flg, $mtime, $xfl, $os) = unpack 'vCCVCC';
print(STDERR "${fn}not in gzip format\n"), next unless $id == GZIP_ID;
my $xtra = '';
if ($flg & FLG_FEXTRA) {
read $fh, $_, 2;
read $fh, $xtra, unpack 'v';
}
if ($flg & FLG_FNAME) {
local $/ = "\0";
my $name = <$fh>;
printf "%sNAME : %s\n", $fn, binpstr $name if $verbose;
}
if ($flg & FLG_FCOMMENT) {
local $/ = "\0";
printf "%sCOMMENT: %s\n", $fn, binpstr <$fh>;
}
close $fh;
if ($verbose && $mtime) {
use POSIX 'strftime';
printf "%sMTIME : %s\n", $fn, strftime '%Y-%m-%d %H:%M:%S Z', gmtime $mtime;
}
if ($verbose || uc CM->{$cm} ne 'DEFLATE') {
printf "%sMETHOD : (%u) %s\n", $fn, $cm, CM->{$cm} // UNKNOWN;
}
if ($verbose || (uc OS->{$os} ne 'UNIX' && uc OS->{$os} ne 'UNKNOWN')) {
printf "%sSYSTEM : (%u) %s\n", $fn, $os, OS->{$os} // UNKNOWN;
}
if ($flg & FLG_RESERVED) {
printf "%sFLG : %s reserved bits set\n", $fn, bflags($flg & FLG_RESERVED);
}
if ($verbose) {
printf "%sFTEXT : %s\n", $fn, $flg & FLG_FTEXT ? 'yes' : 'no';
printf "%sFHCRC : %s\n", $fn, $flg & FLG_FHCRC ? 'yes' : 'no';
printf "%sXFL : %s\n", $fn, XFL->{$xfl} // UNKNOWN;
}
while (length $xtra > 4) {
my ($type, $len) = unpack 'nv', $xtra;
printf "%sEXTRA : (0x%04x) %s (%u bytes)\n",
$fn, $type, EXTRA->{$type} // UNKNOWN, $len;
if ($verbose) {
print hexdump substr($xtra, 4, $len),
prefix => ' 'x(7+length $fn).'> ';
}
$xtra = substr $xtra, 4 + $len;
}
printf "%sEXTRA : non-conformant extra field\n", $fn if length $xtra;
}
exit;
@mwgamera
Copy link
Copy Markdown
Author

Note that "GS" (0x47, 0x53) is the correct identifier for the extra field of gzsig. The official registry in format.txt is in error. The error is of a particularly funny kind as the registry gives (0x1D, 0x53) and 0x1D is an ASCII group separator (abbreviated GS). But the registry also gives a link to an old source tarball which clearly used the first two bytes of "GS" string in C.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment