Last active
April 2, 2017 09:16
-
-
Save cyberfox/e9ac5119ff57df6024e0802e570a4ad7 to your computer and use it in GitHub Desktop.
Look for and remove duplicate files between a pair of trees that are similar.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'digest/md5' | |
class FindDupes | |
def initialize(base) | |
@allfiles = Dir['**/**'] | |
@base = base | |
@files = @allfiles.select {|f| File.file? f} | |
end | |
def find_current | |
@old_hashes = {} | |
@files.each do |f| | |
open(f,'rb') do |file| | |
@old_hashes[f] = Digest::MD5.hexdigest(file.read) | |
end | |
puts @old_hashes.length if (@old_hashes.length % 500) == 0 | |
end | |
@old_hashes | |
end | |
def find_new | |
@new_hashes = {} | |
@files.each do |f| | |
alt_f = File.join(@base, f) | |
open(alt_f,'rb') do |file| | |
@new_hashes[f] = Digest::MD5.hexdigest(file.read) | |
puts @new_hashes.length if (@new_hashes.length % 500) == 0 | |
end if File.exist? alt_f | |
end | |
@new_hashes | |
end | |
def find_dupes | |
@dupes = @new_hashes.keys.select {|k| @new_hashes[k] == @old_hashes[k]} | |
end | |
def total_size | |
@dupes.inject(0) {|accum,f| accum+(File.size f)} | |
end | |
def nuke | |
@dupes.map {|f| File.unlink f}.uniq | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment