Created
October 31, 2014 18:44
-
-
Save LeeMendelowitz/54b9d3e525f0abaa7fdb to your computer and use it in GitHub Desktop.
Rename files in a directory by removing non-alphanumeric characters that shouldn't be in a filename.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Recurseively rename all files in a directory by replace whitespace with underscores | |
and erasing all non-('a-zA-Z0-9' or '_' or '-' or '.') characters. | |
""" | |
import re, os, shutil, argparse, sys | |
parser = argparse.ArgumentParser(description = "Rename all files in directory by replacing whitespace with underscores.") | |
parser.add_argument('directories', metavar="DIR", nargs='*', default = [os.getcwd()], help="directories to walk. Default: CWD") | |
err = sys.stderr.write | |
def new_names(filenames): | |
""" | |
Generate new filenames for the given filenames | |
""" | |
# Replace white space with underscores: | |
filenames_new = (re.sub('\s+', '_', fn) for fn in filenames) | |
# Erase non-alphanumeric-period-underscore characters: | |
filenames_new = (re.sub('[^a-zA-Z0-9._-]', '', fn) for fn in filenames_new) | |
# Merge consecutive underscores for aesthetics | |
filenames_new = [re.sub('_+', '_', fn) for fn in filenames_new] | |
return filenames_new | |
def rename_files_in_dir(dir): | |
""" | |
Walk a directory and rename all files in the path. | |
""" | |
for (dirpath, dirnames, filenames) in os.walk(dir): | |
# Rename the filenames | |
filenames_new = new_names(filenames) | |
for f_old, f_new in zip(filenames, filenames_new): | |
f_old = os.path.join(dirpath, f_old) | |
f_new = os.path.join(dirpath, f_new) | |
if f_old == f_new: | |
continue | |
err("Renaming %s to %s\n"%(os.path.abspath(f_old), os.path.abspath(f_new))) | |
shutil.move(f_old, f_new) | |
# Rename the directories, and modify dirnames in place to aid os.walk | |
dirnames_new = new_names(dirnames) | |
for d_old, d_new in zip(dirnames, dirnames_new): | |
d_old = os.path.join(dirpath, d_old) | |
d_new = os.path.join(dirpath, d_new) | |
if d_old == d_new: | |
continue | |
err("Renaming dir %s to %s\n"%(os.path.abspath(d_old), os.path.abspath(d_new))) | |
shutil.move(d_old, d_new) | |
del dirnames[:] | |
dirnames.extend(dirnames_new) | |
if __name__ == '__main__': | |
args = parser.parse_args() | |
dirs = [os.path.abspath(d) for d in args.directories] | |
for d in dirs: | |
rename_files_in_dir(d) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Wow, works great! Exactly what I was looking for to remove weird UTF characters from my directory names, so I can work with them using web manager.