Last active
October 13, 2021 21:53
-
-
Save mwgamera/9774095 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| # Analyze GZIP header for anything unusual, -v to print the usual too. | |
| # klg, Dec 2013 | |
| use strict; | |
| use constant GZIP_ID => 0x8b1f; | |
| use constant { | |
| FLG_FTEXT => (1 << 0), | |
| FLG_FHCRC => (1 << 1), | |
| FLG_FEXTRA => (1 << 2), | |
| FLG_FNAME => (1 << 3), | |
| FLG_FCOMMENT => (1 << 4), | |
| FLG_RESERVED => 0xff & (-1 << 5) | |
| }; | |
| use constant CM => { | |
| 8 => 'DEFLATE' | |
| }; | |
| use constant OS => { | |
| 0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)', | |
| 1 => 'Amiga', | |
| 2 => 'VMS (or OpenVMS)', | |
| 3 => 'Unix', | |
| 4 => 'VM/CMS', | |
| 5 => 'Atari TOS', | |
| 6 => 'HPFS filesystem (OS/2, NT)', | |
| 7 => 'Macintosh', | |
| 8 => 'Z-System', | |
| 9 => 'CP/M', | |
| 10 => 'TOPS-20', | |
| 11 => 'NTFS filesystem (NT)', | |
| 12 => 'QDOS', | |
| 13 => 'Acorn RISCOS', | |
| 14 => 'VFAT file system (Win95, NT)', | |
| 15 => 'MVS (code also taken for PRIMOS)', | |
| 16 => 'BeOS (BeBox or PowerMac)', | |
| 17 => 'Tandem/NSK', | |
| 18 => 'THEOS', | |
| 19 => 'Mac OS/X (Darwin)', | |
| 30 => 'AtheOS', | |
| 255 => 'unknown' | |
| }; | |
| use constant XFL => { | |
| 0 => 'default compression', | |
| 2 => 'maximum compression', | |
| 4 => 'fastest algorithm', | |
| }; | |
| use constant EXTRA => { | |
| 0x4143 => 'Acorn RISC OS/BBC MOS file type information', | |
| 0x4170 => 'Apollo file type information (RFC 1952)', | |
| 0x4243 => 'Compressed size (BZGF)', | |
| 0x4753 => 'Public-key signature (gzsig)', | |
| 0x4947 => 'Member size (mgzip)', | |
| 0x4b4e => 'KeyNote assertion (RFC 2704)', | |
| 0x4d63 => 'Macintosh info (Type and Creator values)', | |
| 0x5241 => 'Random Access (dictzip)', | |
| 0x524f => 'Acorn Risc OS file type information', | |
| 0x6170 => 'Append information (gzlog)', | |
| 0x6370 => 'file compressed by cpio', | |
| 0x736c => 'WARC/0.9 skip-lengths', | |
| }; | |
| use constant UNKNOWN => "\033[31mUNKNOWN\033[0m"; | |
| sub hlight($) { | |
| $_ = shift; "\033[1m$_\033[0m" | |
| } | |
| sub binpstr($) { | |
| $_ = shift; | |
| s{\0$}{}; | |
| my $utf8 = !m/^[\0-\x7f]*$/ && m{ | |
| ^( | |
| [\0-\x7f] | | |
| [\xc0-\xdf][\x80-\xbf] | | |
| [\xe0-\xef][\x80-\xbf]{2} | | |
| [\xf0-\xf7][\x80-\xbf]{3} | | |
| [\xf8-\xfb][\x80-\xbf]{4} | | |
| [\xfc-\xfd][\x80-\xbf]{5} | |
| )+$ | |
| }x; | |
| utf8::decode $_ if $utf8; | |
| s{([\0-\x1f\x7f-\x9f])}{ | |
| hlight({ | |
| "\0" => '\0', | |
| "\a" => '\a', | |
| "\e" => '\e', | |
| "\b" => '\b', | |
| "\f" => '\f', | |
| "\n" => '\n', | |
| "\r" => '\r', | |
| "\t" => '\t', | |
| "\"" => '\"', | |
| "\\" => '\\', | |
| }->{$1} // sprintf '\\%03o', ord $1) | |
| }ge; | |
| utf8::encode $_; | |
| ($utf8 ? '(UTF-8)' : '') . "\"$_\"" | |
| } | |
| sub bflags($) { | |
| my $x = shift; | |
| join '+', grep $x & (1<<$_), 0 .. 63; | |
| } | |
| sub hexdump($%) { | |
| (local $_, my %opts) = @_; | |
| my $p = $opts{prefix} // ''; | |
| my $g = $opts{grou} || 2; | |
| my $c = $opts{cols}; | |
| unless ($c) { | |
| $c = ($ENV{COLUMNS} || 80) - length $p; | |
| $c = int(($c - 3) / (3*$g+1)) * $g; | |
| } | |
| $c = $g if $c < $g; | |
| join '', map { | |
| sprintf "%s%-*s %s\n", $p, $c/$g*(2*$g+1), | |
| join(' ', unpack '(a'.(2*$g).')*', unpack 'H*', $_), | |
| $_ =~ tr/\x00-\x20\x7f-\xff/./r | |
| } unpack "(a$c)*" | |
| } | |
| shift @ARGV if my $verbose = $#ARGV >= 0 && $ARGV[0] eq '-v'; | |
| push @ARGV, '-' if $#ARGV < 0; | |
| for my $file (@ARGV) { | |
| my $fn = $file eq '-' ? '' : "$file: "; | |
| open my $fh, $file or print(STDERR "${fn}$!\n"), next; | |
| binmode $fh; | |
| read $fh, $_, 10; | |
| my ($id, $cm, $flg, $mtime, $xfl, $os) = unpack 'vCCVCC'; | |
| print(STDERR "${fn}not in gzip format\n"), next unless $id == GZIP_ID; | |
| my $xtra = ''; | |
| if ($flg & FLG_FEXTRA) { | |
| read $fh, $_, 2; | |
| read $fh, $xtra, unpack 'v'; | |
| } | |
| if ($flg & FLG_FNAME) { | |
| local $/ = "\0"; | |
| my $name = <$fh>; | |
| printf "%sNAME : %s\n", $fn, binpstr $name if $verbose; | |
| } | |
| if ($flg & FLG_FCOMMENT) { | |
| local $/ = "\0"; | |
| printf "%sCOMMENT: %s\n", $fn, binpstr <$fh>; | |
| } | |
| close $fh; | |
| if ($verbose && $mtime) { | |
| use POSIX 'strftime'; | |
| printf "%sMTIME : %s\n", $fn, strftime '%Y-%m-%d %H:%M:%S Z', gmtime $mtime; | |
| } | |
| if ($verbose || uc CM->{$cm} ne 'DEFLATE') { | |
| printf "%sMETHOD : (%u) %s\n", $fn, $cm, CM->{$cm} // UNKNOWN; | |
| } | |
| if ($verbose || (uc OS->{$os} ne 'UNIX' && uc OS->{$os} ne 'UNKNOWN')) { | |
| printf "%sSYSTEM : (%u) %s\n", $fn, $os, OS->{$os} // UNKNOWN; | |
| } | |
| if ($flg & FLG_RESERVED) { | |
| printf "%sFLG : %s reserved bits set\n", $fn, bflags($flg & FLG_RESERVED); | |
| } | |
| if ($verbose) { | |
| printf "%sFTEXT : %s\n", $fn, $flg & FLG_FTEXT ? 'yes' : 'no'; | |
| printf "%sFHCRC : %s\n", $fn, $flg & FLG_FHCRC ? 'yes' : 'no'; | |
| printf "%sXFL : %s\n", $fn, XFL->{$xfl} // UNKNOWN; | |
| } | |
| while (length $xtra > 4) { | |
| my ($type, $len) = unpack 'nv', $xtra; | |
| printf "%sEXTRA : (0x%04x) %s (%u bytes)\n", | |
| $fn, $type, EXTRA->{$type} // UNKNOWN, $len; | |
| if ($verbose) { | |
| print hexdump substr($xtra, 4, $len), | |
| prefix => ' 'x(7+length $fn).'> '; | |
| } | |
| $xtra = substr $xtra, 4 + $len; | |
| } | |
| printf "%sEXTRA : non-conformant extra field\n", $fn if length $xtra; | |
| } | |
| exit; |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that "GS" (0x47, 0x53) is the correct identifier for the extra field of gzsig. The official registry in format.txt is in error. The error is of a particularly funny kind as the registry gives (0x1D, 0x53) and 0x1D is an ASCII group separator (abbreviated GS). But the registry also gives a link to an old source tarball which clearly used the first two bytes of
"GS"string in C.