Skip to content

Instantly share code, notes, and snippets.

@mlbright
Created September 28, 2013 03:55
Show Gist options
  • Save mlbright/6738195 to your computer and use it in GitHub Desktop.
Save mlbright/6738195 to your computer and use it in GitHub Desktop.
remove duplicate files
#!/usr/bin/env perl
use strict;
use warnings;
use File::Find;
use Digest::MD5;
my $pics = 'c:/pics';
sub md5sum {
my ($path) = @_;
open my $fh, '<', $path
or die "Could not open $path: $!\n";
binmode($fh);
my $checksum = Digest::MD5->new->addfile($fh)->hexdigest;
close($fh);
return $checksum;
}
my %sizes;
my %path2md5;
my %md52path;
find(
sub {
my $path = $File::Find::name;
return if $path =~ m/^\.{1,2}$/;
return unless ( -f $path );
my @fields = stat $path;
my $size = $fields[7];
if ( $sizes{$size} ) {
my $md5sum;
unless ( $path2md5{$path} ) {
$md5sum = md5sum( $sizes{$size} );
$path2md5{ $sizes{$size} } = $md5sum;
$md52path{$md5sum} = $sizes{$size};
}
$md5sum = md5sum($path);
$path2md5{$path} = $md5sum;
if ( $md52path{$md5sum} ) {
print "deleting $path, a duplicate of "
. $md52path{$md5sum} . "\n";
unlink $path;
}
else {
$md52path{$md5sum} = $path;
}
}
else {
print "$path\n";
$sizes{$size} = $path;
}
},
$pics
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment