Last active
June 25, 2017 10:12
-
-
Save dialupnoises/ea6a2527eaffa5f0f331dcad534ba1bd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('graceful-fs'), | |
log = require('single-line-log').stdout, | |
crypto = require('crypto'), | |
path = require('path'), | |
async = require('async'); | |
// compute the sha1 hash of the file | |
function hashFile(name, cb) | |
{ | |
var hash = crypto.createHash('sha1'), | |
stream = fs.createReadStream(name); | |
stream.on('data', function (data) { | |
hash.update(data); | |
}); | |
stream.on('end', function () { | |
cb(hash.digest('hex')); | |
}); | |
} | |
// actually perform the deduplication | |
function runDedup(hashes) | |
{ | |
var keys = Object.keys(hashes).filter((k) => hashes[k].length > 1); | |
console.log('\n'); | |
console.log('found duplicates:'); | |
keys.forEach((k) => { | |
console.log('\t' + hashes[k].join(', ')); | |
}); | |
// if we want to actually delete anything, delete all but the first one | |
if(process.argv[2] == 'keep-first') | |
{ | |
log('deleting duplicates'); | |
keys.forEach((k) => { | |
hashes[k].slice(1).forEach((f) => { | |
log('deleting ' + f); | |
fs.unlinkSync(f); | |
}); | |
}); | |
} | |
} | |
// find all non-directory files | |
log('finding files'); | |
var files = fs.readdirSync('.'); | |
files = files.filter(function(f) { | |
return !(fs.statSync(f).isDirectory()); | |
}); | |
log('found ' + files.length + ' files'); | |
var hashes = {}; | |
var numComplete = 0; | |
// compute hashes of all files found | |
var cbs = files.map(function(f) { | |
return (cb) => { | |
hashFile(f, function(hash) { | |
if(hashes[hash] != null) | |
{ | |
hashes[hash].push(f); | |
} | |
else | |
{ | |
hashes[hash] = [f]; | |
} | |
numComplete++; | |
log('hashing ' + numComplete + '/' + files.length); | |
cb(); | |
}); | |
}; | |
}); | |
// dedup ten files at a time | |
async.parallelLimit(cbs, 10, (err, results) => runDedup(hashes)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment