Created
April 20, 2018 12:48
-
-
Save s1037989/e31b12e3d36d09bf58a6944fb9a1f799 to your computer and use it in GitHub Desktop.
This script deduplicates files based on md5sum. Wherever there are copies of a file this script removes the copies and replaces them with hardlinks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use String::Truncate qw(elide); | |
my $_md5sum = ''; | |
my $_file; | |
my $c = 0; | |
# TODO: what Linux command generates the "md5\tpath" file | |
open MD5, 'md5sum-sort.txt'; | |
while ( local $_ = <MD5> ) { | |
chomp; | |
my ($md5sum, $file) = split /\s+/, $_, 2; | |
if ( $md5sum ne $_md5sum ) { | |
$c++ and next unless -e $file && -f _; | |
$_md5sum = $md5sum; | |
$_file = $file; | |
my $nlink = (stat($_file))[3]; | |
printf "> %s (%s) %s %s\n", elide($_file, 100, { truncate => 'middle' }), $nlink, ' ' x (100 - length($_file) > 0 ? (100 - length($_file)) : 0), $_md5sum; | |
} elsif ( $file && $_file ) { | |
$c++ and next if ! -f $file || (stat($file))[3] > 1; | |
printf " (%s) unlink %s\n", (stat($_file))[3], elide($file, 100, { truncate => 'middle' }); | |
unlink $file or print "!! unlink error $!:\n!! $file\n"; | |
printf " (%s) link %s, %s\n", (stat($_file))[3], elide($_file, 50, { truncate => 'middle' }), elide($file, 50, { truncate => 'middle' }); | |
link $_file, $file or print "!! link error $!:\n!! $_file => $file\n"; | |
} else { | |
warn "Should never get here\n"; | |
} | |
last if $ARGV[0] && ++$c >= $ARGV[0]; | |
} | |
close MD5; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment