Last active
December 31, 2022 18:49
-
-
Save eqhmcow/5389877 to your computer and use it in GitHub Desktop.
Perl unzip example with IO::Uncompress::Unzip
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# example perl code, but this should now actually work properly, even on | |
# Windows | |
# thanks to everyone who tested this, reported bugs and suggested or | |
# implemented fixes! | |
# this code is licensed under GPL 2 and/or Artistic license; | |
# aka free perl software | |
use strict; | |
use warnings; | |
=pod | |
IO::Uncompress::Unzip works great to process zip files; but, it doesn't | |
include a routine to actually extract an entire zip file. | |
Other modules like Archive::Zip include their own unzip routines, which aren't | |
as robust as IO::Uncompress::Unzip; eg. they don't work on zip64 archive files. | |
So, the following is code to actually use IO::Uncompress::Unzip to extract | |
a zip file. | |
THIS IS EXAMPLE CODE | |
It certainly works and does a good bit of error handling, but it doesn't do | |
every sanity check you might expect. For example, it will, without warning | |
you, gladly overwrite any existing files that have the same name as files in | |
the archive if you extract it to an existing directory. | |
=cut | |
use File::Spec::Functions qw(splitpath); | |
use IO::File; | |
use IO::Uncompress::Unzip qw($UnzipError); | |
use File::Path qw(mkpath); | |
# example code to call unzip: | |
unzip(shift); | |
=head2 unzip | |
Extract a zip file, using IO::Uncompress::Unzip. | |
Arguments: file to extract, destination path | |
unzip('stuff.zip', '/tmp/unzipped'); | |
=cut | |
sub unzip { | |
my ($file, $dest) = @_; | |
die 'Need a file argument' unless defined $file; | |
$dest = "." unless defined $dest; | |
$dest =~ s!/|\\$!!; | |
die "File argument is a directory: $file" | |
if -d $file; | |
die "No such file: $file $!" | |
unless -e $file; | |
my $u = IO::Uncompress::Unzip->new($file) | |
or die "Cannot open $file: $UnzipError"; | |
my $status; | |
my %dirs; | |
for ($status = 1; $status > 0; $status = $u->nextStream()) { | |
# bail on error | |
last if $status < 0; | |
my $header = $u->getHeaderInfo(); | |
my $stored_time = $header->{'Time'}; | |
my (undef, $path, $name) = splitpath($header->{Name}); | |
$path =~ s!/|\\$!!; | |
$name =~ s!/|\\$!!; | |
my $destdir = "$dest/$path"; | |
my $destfile = "$destdir/$name"; | |
# https://cwe.mitre.org/data/definitions/37.html | |
# CWE-37: Path Traversal | |
die "unsafe $destfile" if $destfile =~ m!\Q..\E(/|\\)!; | |
# don't try to overwrite an extant file by creating a directory | |
if (-e $destdir and not -d $destdir) { | |
die "Cannot create directory $destdir: File or path already exists.\nTry extracting " . | |
"to a different directory."; | |
} | |
# skip if the entire path is just an extant directory | |
if (-d $destfile) { | |
next; | |
} | |
# ok let's make a directory for this zip archive entry | |
unless (-d $destdir) { | |
mkpath($destdir) or die "Couldn't mkdir $destdir: $!"; | |
# we're done if the entire path is simply the directory we | |
# just created | |
if (-d $destfile) { | |
# this entry is probably for the directory itself, so store | |
# its mtime, because we have to touch all the dirs after | |
# creating all the files; otherwise as we process the archive, | |
# file creation will just reset each directory's mtime | |
$dirs{$destdir} = $stored_time; | |
next; | |
} | |
} | |
# ok we should have a valid file here we can extract | |
my $buff; | |
my $fh = IO::File->new($destfile, "w") | |
or die "Couldn't write to $destfile: $!"; | |
$fh->binmode(); | |
while (($status = $u->read($buff)) > 0) { | |
$fh->write($buff); | |
} | |
$fh->close(); | |
utime ($stored_time, $stored_time, $destfile) | |
or die "Couldn't touch $destfile: $!"; | |
} | |
die "Error processing $file: $UnzipError $!\n" | |
if $status < 0 ; | |
# touch all the dirs that we created and that also had explicit directory | |
# entries in the archive | |
foreach my $dirpath (keys %dirs) { | |
my $stored_time = $dirs{$dirpath}; | |
utime ($stored_time, $stored_time, $dirpath) | |
or die "Couldn't touch directory $dirpath: $!"; | |
} | |
return; | |
} | |
1; |
OK! People are still using this code I wrote 8 years ago! I guess that's the danger of contributing to open source :)
Many people reported bugs and supplied fixes. Much thanks to @volomike, @mgx259, @tferic, @alambike, @nazarekm,
and special thanks to @blakeyjason , @maikelsteneker and @tiobe for forking and improving the code. I've taken your improvements and now the code is cleaner and works better on Windows and Linux, and probably everywhere Perl 5.8 (or maybe even less) works. Perl will never die! :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am also facing the same issue on Windows. Is there a way out to get the original properties of the files ?