|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
"""Normalize unicode file names.""" |
|
|
|
from __future__ import unicode_literals |
|
|
|
from argparse import ArgumentParser |
|
from os import rename, walk |
|
from os.path import exists, isfile, join, split |
|
from sys import version_info |
|
from unicodedata import normalize |
|
|
|
|
|
def bytes_saved(old, new): |
|
"""Print difference of bytes between old an new string.""" |
|
diff = len(new) - len(old) |
|
s = "[\033[" |
|
if diff < 0: |
|
s += "32m" + str(diff) |
|
elif diff > 0: |
|
s += "31m+" + str(diff) |
|
else: |
|
s += "34m=" |
|
s += " byte" |
|
if abs(diff) > 1: |
|
s += "s" |
|
return s + "\033[0m]" |
|
|
|
|
|
def norm(root, file, form, proceed): |
|
"""Do the normalization.""" |
|
normed = ( |
|
normalize(form, file).replace("/", "/").replace("\\", "\").replace(":", ":") |
|
) |
|
if file != normed: |
|
old = join(root, file) |
|
new = join(root, normed) |
|
if exists(new): |
|
print("%s \033[31mcannot be renamed as\033[0m %s \033[31malready exists\033[0m" % (old, normed)) |
|
else: |
|
print("%s ▶︎ %s %s" % (old, normed, bytes_saved(file, normed))) |
|
if proceed: |
|
rename(old, new) |
|
|
|
|
|
def main(): |
|
"""Normalize unicode file names.""" |
|
parser = ArgumentParser(description="Normalize unicode file names.") |
|
parser.add_argument("source", help="the source file or directory") |
|
parser.add_argument( |
|
"-c", |
|
"--compatibility", |
|
action="store_true", |
|
help='normalize with compatibility (ex: "fi"' ' becomes "fi")', |
|
) |
|
parser.add_argument("-p", "--proceed", action="store_true", help="rename files") |
|
parser.add_argument( |
|
"-r", |
|
"--recursive", |
|
action="store_true", |
|
help="go through directories recursively", |
|
) |
|
args = parser.parse_args() |
|
|
|
if version_info < (3,): |
|
args.source = unicode(args.source, "utf8") |
|
|
|
norm_form = "NFKC" if args.compatibility else "NFC" |
|
|
|
# Source is a file |
|
if isfile(args.source): |
|
head, tail = split(args.source) |
|
norm(head, tail, norm_form, args.proceed) |
|
# Source is a directory |
|
else: |
|
for root, dirs, files in walk(args.source): |
|
for d in dirs: |
|
norm(root, d, norm_form, args.proceed) |
|
for f in files: |
|
norm(root, f, norm_form, args.proceed) |
|
if not args.recursive: |
|
break |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
Just an other observation: If a folder after normalization will get the same name as an allready existing one, the script will stop and I get the message:
Traceback (most recent call last):
File "nfcfn.py", line 82, in
main()
File "nfcfn.py", line 74, in main
norm(root, d, norm_form, args.proceed)
File "nfcfn.py", line 39, in norm
rename(old, new)
OSError: [Errno 39] Directory not empty