Skip to content

Instantly share code, notes, and snippets.

@afresh1
Last active December 22, 2021 22:08
Show Gist options
  • Save afresh1/abfb1099502682a192fdbee94fb948de to your computer and use it in GitHub Desktop.
Save afresh1/abfb1099502682a192fdbee94fb948de to your computer and use it in GitHub Desktop.
A perl duplicate file finder based on size and hash. Uses only core perl modules.
#!/usr/bin/env perl
use v5.16;
use warnings;
#
# Copyright (c) 2021 Andrew Hewus Fresh <[email protected]>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
use File::Find qw< find >;
use Digest::SHA;
HotKey->import('readkey');
@ARGV = '.' unless @ARGV;
my $alg = 'sha256';
my %files;
find( sub {
return unless -f $_;
my %info = (
#name => $_,
#dir => $File::Find::dir,
path => $File::Find::name,
);
#@info{qw< size atime mtime ctime >} = ( stat _ )[ 7 .. 10 ];
push @{ $files{ -s _ } }, \%info;
}, @ARGV );
foreach my $size ( sort { $b <=> $a } keys %files ) {
next unless @{ $files{$size} } > 1;
my %dupes;
foreach my $info ( @{ $files{$size} } ) {
my $digest = Digest::SHA->new($alg)
->addfile( $info->{path} )
->hexdigest;
push @{ $dupes{$digest} }, $info;
}
foreach my $digest ( sort keys %dupes ) {
next unless @{ $dupes{$digest} } > 1;
my $delete = pick_dupes( $dupes{$digest} );
unlink or warn "Unable to unlink $_: $!"
for map { $_->{path} } @{$delete};
}
}
sub pick_dupes {
my ($dupes) = @_;
my $n = 0;
my @dupes = map { $_->{n} = ++$n; $_ }
sort { length $b->{path} <=> length $a->{path} } @{$dupes};
$dupes[0]{keep} = 1;
die "Too many dupes!" if $n > 9;
while (1) {
say "\n * indicates files to keep.";
printf "%1s [%1s] %s\n", $_->{n}, $_->{keep} ? '*' : ' ', $_->{path}
for @dupes;
printf
"1-$n to toggle, s[w]ap, keep [a]ll [n]one, [d]elete, [q]uit: ";
STDOUT->flush;
my $key = readkey();
print "\n";
if ( $key =~ /^[1-9]$/ and $#dupes >= $key - 1 ) {
my $i = $key - 1;
$dupes[$i]{keep} = $dupes[$i]{keep} ? undef : 1;
}
elsif ( fc $key eq 'w' ) {
$_->{keep} = !$_->{keep} for @dupes;
}
elsif ( fc $key eq 'a' ) {
$_->{keep} = 1 for @dupes;
$key = 'd';
}
elsif ( fc $key eq 'n' ) {
delete $_->{keep} for @dupes;
$key = 'd';
}
elsif ( fc $key eq 'q' ) {
exit;
}
if ( fc $key eq 'd' ) {
my $sure = 'y';
if ( !grep { $_->{keep} } @dupes ) {
print "Are you sure you want to remove all files? y/n ";
STDOUT->flush;
$sure = readkey();
print "\n";
}
return [ grep { !$_->{keep} } @dupes ]
if fc $sure eq 'y';
}
}
}
BEGIN {
# HotKey.pm - From perlfaq8
package HotKey;
use strict;
use warnings;
use parent 'Exporter';
our @EXPORT_OK = qw(cbreak cooked readkey);
use POSIX qw(:termios_h);
my ( $term, $oterm, $echo, $noecho, $fd_stdin );
$fd_stdin = fileno(STDIN);
$term = POSIX::Termios->new();
$term->getattr($fd_stdin);
$oterm = $term->getlflag();
$echo = ECHO | ECHOK | ICANON;
$noecho = $oterm & ~$echo;
sub cbreak {
#$term->setlflag($noecho); # ok, so i don't want echo either
$term->setlflag( $oterm & ~ICANON );
$term->setcc( VTIME, 1 );
$term->setattr( $fd_stdin, TCSANOW );
}
sub cooked {
$term->setlflag($oterm);
$term->setcc( VTIME, 0 );
$term->setattr( $fd_stdin, TCSANOW );
}
sub readkey {
my $key = '';
cbreak();
sysread( STDIN, $key, 1 );
cooked();
return $key;
}
END { cooked() }
1;
}
@fireglow
Copy link

Nice!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment