Last active
December 11, 2017 23:58
-
-
Save MrDrMcCoy/feb265c2d5b340e4a69e2e298399d588 to your computer and use it in GitHub Desktop.
A simple duplicate file finder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
def linkdupes(base_dir, loglevel='DEBUG'): | |
import logging, mimetypes, os, zlib | |
logging.basicConfig(format='%(asctime)s | %(levelname)s | %(funcName)s | %(message)s') | |
logger = logging.getLogger('linkdupes') | |
logger.setLevel(logging.getLevelName(loglevel)) | |
mimetypes.add_type('audio/ape', '.ape') | |
hashes = {} | |
for root, _, files in os.walk(base_dir, followlinks=True): | |
logger.info('Processing directory: ' + root) | |
for f in files: | |
current = os.path.join(root, f) | |
logger.debug('Processing file: ' + f) | |
mime = str(mimetypes.guess_type(current, strict=False)) | |
try: | |
if 'audio' in mime and not os.path.islink(current): | |
stdin, stdout, stderr = os.popen3( | |
'ffmpeg -t 10 -i "' + current + '" -map_metadata -1 -map 0:a:0 -f u8 pipe:', 'rb' | |
) | |
filehash = zlib.adler32(stdout.read()) | |
logger.debug('ffmpeg hash: ' + str(filehash)) | |
elif os.path.islink(current): | |
logger.debug('Skipping link: ' + current) | |
else: | |
with open(current) as o: | |
filehash = zlib.adler32(o.read()) | |
logger.debug('Plain file hash: ' + str(filehash)) | |
if filehash not in hashes: | |
hashes[filehash] = current | |
else: | |
logger.info('Found duplicate file: ' + current) | |
try: | |
os.remove(current) | |
logger.debug('Removed dupe.') | |
#os.symlink(hashes[filehash], current) | |
#logger.debug('Replaced dupe with symlink to: ' + hashes[filehash]) | |
except Exception as ex: | |
logger.exception('Unable to replace file with symlink.') | |
return | |
except Exception as ex: | |
logger.exception('Error processing file: ' + current) | |
linkdupes('/media/music', 'DEBUG') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment