Created
March 17, 2021 21:09
-
-
Save ssokolow/1217ea47fecbd10d57c80040eb32d90e to your computer and use it in GitHub Desktop.
Simple script to rename non-BMP Unicode characters out of file and folder names, recursively
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
"""Strip emoji and other non-BMP codepoints from paths to make them compatible | |
with mkisofs/genisoimage""" | |
# Prevent Python 2.x PyLint from complaining if run on this | |
from __future__ import (absolute_import, division, print_function, | |
with_statement, unicode_literals) | |
__author__ = "Stephan Sokolow (deitarion/SSokolow)" | |
__appname__ = "strip_emoji.py" | |
__version__ = "0.1" | |
__license__ = "MIT" | |
import logging, os, re | |
log = logging.getLogger(__name__) | |
NON_BMP_RE = re.compile(r"[\U00010000-\U0010FFFF]") | |
def process_path(path): | |
replaced = NON_BMP_RE.sub('', path) | |
if replaced != path: | |
os.rename(path, replaced) | |
return replaced | |
def process_arg(path): | |
for path, dirs, files in os.walk(path): | |
dirs.sort() | |
dnew = [process_path(os.path.join(path, x)) for x in dirs] | |
dirs[:] = [os.path.basename(x) for x in dnew] | |
for fname in files: | |
process_path(os.path.join(path, fname)) | |
def main(): | |
"""The main entry point, compatible with setuptools entry points.""" | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter | |
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, | |
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0]) | |
parser.add_argument('--version', action='version', | |
version="%%(prog)s v%s" % __version__) | |
parser.add_argument('-v', '--verbose', action="count", | |
default=2, help="Increase the verbosity. Use twice for extra effect.") | |
parser.add_argument('-q', '--quiet', action="count", | |
default=0, help="Decrease the verbosity. Use twice for extra effect.") | |
parser.add_argument('path', action="store", nargs="+", | |
help="Path to operate on") | |
# Reminder: %(default)s can be used in help strings. | |
args = parser.parse_args() | |
# Set up clean logging to stderr | |
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING, | |
logging.INFO, logging.DEBUG] | |
args.verbose = min(args.verbose - args.quiet, len(log_levels) - 1) | |
args.verbose = max(args.verbose, 0) | |
logging.basicConfig(level=log_levels[args.verbose], | |
format='%(levelname)s: %(message)s') | |
for path in args.path: | |
process_arg(path) | |
if __name__ == '__main__': # pragma: nocover | |
main() | |
# vim: set sw=4 sts=4 expandtab : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment