Skip to content

Instantly share code, notes, and snippets.

@reagle
Created May 3, 2016 13:53
Show Gist options
  • Save reagle/eab6412de8fe8e76415d143022c85c64 to your computer and use it in GitHub Desktop.
Save reagle/eab6412de8fe8e76415d143022c85c64 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# detox filenames so as to across most file systems
# (c) Copyright 2016 by Joseph Reagle
# Licensed under the GPLv3, see <http://www.gnu.org/licenses/gpl-3.0.html>
#
import codecs
import glob
import locale
import logging
import os
from os import chdir, environ, mkdir, rename, walk
from os.path import abspath, basename, exists, isdir, isfile, splitext
import re
import sys
import unicodedata
HOME = environ['HOME']
critical = logging.critical
info = logging.info
dbg = logging.debug
warn = logging.warn
error = logging.error
excpt = logging.exception
def remove_accents(s):
"""remove accents for ascii analogs"""
nkfd_form = unicodedata.normalize('NFKD', s)
return u''.join([c for c in nkfd_form if not unicodedata.combining(c)])
def remove_reserved(s):
"""only keep characters liked across most file systems"""
# http://serverfault.com/questions/124611/special-characters-in-samba-filenames
# https://amigotechnotes.wordpress.com/2015/04/02/invalid-characters-in-file-names/
PERMITTED_CHARS = '''[^\w\-_\. \^&'@\{\}\[\],$=\!-#\(\)%.+~]'''
s = s.replace(u'’', "'") # I like apostrophes, but no smart ones
s = s.replace(u'?', "") # makes more sense just to remove '?'
s = re.sub(PERMITTED_CHARS, '_', s)
return s
def detoxify_fn(path, fn):
new_fn = remove_accents(fn)
new_fn = remove_reserved(new_fn)
fn = os.path.join(path, fn)
new_fn = os.path.join(path, new_fn)
if new_fn != fn:
try:
print("detoxing '%s' to '%s'" %(fn, new_fn))
if not args.no_rename:
os.rename(fn, new_fn)
except Exception as e:
print(e)
else:
info('no need to rename %s' %fn)
def recurse(directory):
for path, dirs, files in walk(directory):
for fn in files:
info("fn = %s" %fn)
detoxify_fn(path, fn)
if '__main__' == __name__:
import argparse # http://docs.python.org/dev/library/argparse.html
arg_parser = argparse.ArgumentParser(description='detoxify filenames so as to work over most file systems')
# positional arguments
arg_parser.add_argument('files', nargs='+', metavar='FILE')
# optional arguments
arg_parser.add_argument("-n", "--no-rename",
action="store_true", default=False,
help="no renaming; perform dry run")
arg_parser.add_argument('-L', '--log-to-file',
action="store_true", default=False,
help="log to file %(prog)s.log")
arg_parser.add_argument('-V', '--verbose', action='count', default=0,
help="Increase verbosity (specify multiple times for more)")
arg_parser.add_argument('--version', action='version', version='TBD')
args = arg_parser.parse_args()
log_level = 100 # default
if args.verbose == 1: log_level = logging.CRITICAL #50
elif args.verbose == 2: log_level = logging.INFO #20
elif args.verbose >= 3: log_level = logging.DEBUG #10
LOG_FORMAT = "%(levelno)s %(funcName).5s: %(message)s"
if args.log_to_file:
logging.basicConfig(filename='PROG-TEMPLATE.log', filemode='w',
level=log_level, format = LOG_FORMAT)
else:
logging.basicConfig(level=log_level, format = LOG_FORMAT)
if len(args.files) > 1:
print("Sorry, I only take a single argument: "
"a directory to recurse on or filename.")
sys.exit()
fn = args.files[0]
if isfile(fn):
detoxify_fn(fn)
elif isdir(fn):
recurse(fn)
else:
print("Sorry, argument is unknown file type")
sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment