Skip to content

Instantly share code, notes, and snippets.

@raggleton
Created December 26, 2018 21:20
Show Gist options
  • Save raggleton/aec43cb5aff8427f66366336cb483e0a to your computer and use it in GitHub Desktop.
Save raggleton/aec43cb5aff8427f66366336cb483e0a to your computer and use it in GitHub Desktop.
Find duplicate music files and empty dirs
#!/usr/bin/env python
"""Look for "bad" directories with duplicate files (xxx.mp3 vs xxx 1.mp3),
empties, only artwork, etc
"""
import os
import re
import sys
from send2trash import send2trash
def folder_size(path='.'):
total = 0
for entry in os.scandir(path):
if entry.is_file():
total += entry.stat().st_size
elif entry.is_dir():
total += folder_size(entry.path)
return total
def filter_useless(contents):
# FIXME really should check final filename, but cant image a dir named that...
return [c for c in contents if ".DS_Store" not in c]
AUDIO_FMTS = [".mp3", ".wav", ".wma", ".m4a", ".ogg", ".flac"]
# If True, send to trash immediately, otherwise just write txtfile with names
DELETE_NOW = True
def send_to_trash(filename):
if DELETE_NOW:
send2trash(filename)
to_delete = []
total_size = 0
for dirname in os.listdir('.'):
if not os.path.isdir(dirname):
continue
dir_contents = filter_useless(os.listdir(dirname))
if len(dir_contents) == 0:
print('*** Empty:', dirname)
to_delete.append(dirname)
send_to_trash(dirname)
continue
for album in dir_contents:
full_album_path = os.path.join(dirname, album)
if not os.path.isdir(full_album_path):
continue
contents = filter_useless(os.listdir(full_album_path))
if len(contents) == 0:
print('*** Empty:', full_album_path)
to_delete.append(full_album_path)
send_to_trash(full_album_path)
continue
found_audio = any(os.path.splitext(fn)[1] in AUDIO_FMTS for fn in contents)
if not found_audio:
print("*** No music in dir:", full_album_path)
to_delete.append(full_album_path)
total_size += folder_size(full_album_path)
send_to_trash(full_album_path)
for filename in contents:
stem, ext = os.path.splitext(filename)
# re.escape vital for filenames with brackets, hypens, etc
dupe_pattern = re.compile(re.escape(stem) +r' [0-9]' + ext)
for fn in contents:
if re.match(dupe_pattern, fn):
print('*** Original:', filename, 'Duplicate:', fn)
full_filename = os.path.join(full_album_path, fn)
to_delete.append(full_filename)
total_size += os.path.getsize(full_filename)
send_to_trash(full_filename)
print("You will save %.1f MB" % (total_size / (1024*1024)))
if not DELETE_NOW:
with open('toDelete.txt', 'w') as of:
for fname in to_delete:
of.write(fname + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment