Created
December 14, 2009 18:15
-
-
Save dbr/256270 to your computer and use it in GitHub Desktop.
Function to ensure a filename is valid
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import platform | |
def makeValidFilename(value, normalize_unicode = False, windows_safe = False, custom_blacklist = None): | |
""" | |
Takes a string and makes it into a valid filename. | |
normalize_unicode replaces accented characters with ASCII equivalent, and | |
removes characters that cannot be converted sensibly to ASCII. | |
windows_safe forces Windows-safe filenames, regardless of current platform | |
custom_blacklist specifies additional characters that will removed. This | |
will not touch the extension separator: | |
>>> makeValidFilename("T.est.avi", custom_blacklist=".") | |
'T_est.avi' | |
""" | |
if windows_safe: | |
# Allow user to make Windows-safe filenames, if they so choose | |
sysname = "Windows" | |
else: | |
sysname = platform.system() | |
# Treat extension seperatly | |
value, extension = os.path.splitext(value) | |
# Remove null byte | |
value = value.replace("\0", "") | |
# If the filename starts with a . prepend it with an underscore, so it | |
# doesn't become hidden | |
if value.startswith("."): | |
value = "_" + value | |
# Blacklist of characters | |
if sysname == 'Darwin': | |
# : is technically allowed, but Finder will treat it as / and will | |
# generally cause weird behaviour, so treat it as invalid. | |
blacklist = r"/:" | |
elif sysname == 'Linux': | |
blacklist = r"/" | |
else: | |
# platform.system docs say it could also return "Windows" or "Java". | |
# Failsafe and use Windows sanitisation for Java, as it could be any | |
# operating system. | |
blacklist = r"\/:*?\"<>|" | |
# Append custom blacklisted characters | |
if custom_blacklist is not None: | |
blacklist += custom_blacklist | |
# Replace every blacklisted character with a underscore | |
value = re.sub("[%s]" % re.escape(blacklist), "_", value) | |
# Remove any trailing whitespace | |
value = value.strip() | |
# There are a bunch of filenames that are not allowed on Windows. | |
# As with character blacklist, treat non Darwin/Linux platforms as Windows | |
if sysname not in ['Darwin', 'Linux']: | |
invalid_filenames = ["CON", "PRN", "AUX", "NUL", "COM1", "COM2", | |
"COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", | |
"LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"] | |
if value in invalid_filenames: | |
value = "_" + value | |
# Replace accented characters with ASCII equivalent | |
if normalize_unicode: | |
import unicodedata | |
value = unicode(value) # cast data to unicode | |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') | |
# Truncate filenames to valid length | |
if sysname in ['Darwin', 'Linux']: | |
max_len = 255 | |
else: | |
max_len = 32 | |
if len(value + extension) > max_len: | |
if len(extension) > len(value): | |
# Truncate extension instead of filename, no extension should be | |
# this long.. | |
new_length = max_len - len(value) | |
extension = extension[:new_length] | |
else: | |
new_length = max_len - len(extension) | |
value = value[:new_length] | |
return value + extension | |
def test(): | |
def assertEquals(a, b): | |
assert a == b, "Error, %r not equal to %r" % (a, b) | |
assertEquals(makeValidFilename("test.avi"), "test.avi") | |
assertEquals(makeValidFilename("Test File.avi"), "Test File.avi") | |
assertEquals(makeValidFilename("Test"), "Test") | |
assertEquals(makeValidFilename("Test/File.avi"), "Test_File.avi") | |
assertEquals(makeValidFilename("Test/File"), "Test_File") | |
assertEquals(makeValidFilename("Test/File.avi", windows_safe = True), "Test_File.avi") | |
assertEquals(makeValidFilename("\\/:*?<Evil>|\"", windows_safe = True), "______Evil___") | |
assertEquals(makeValidFilename("COM2.txt", windows_safe = True), "_COM2.txt") | |
assertEquals(makeValidFilename("COM2", windows_safe = True), "_COM2") | |
assertEquals(makeValidFilename("."), "_.") | |
assertEquals(makeValidFilename(".."), "_..") | |
assertEquals(makeValidFilename("..."), "_...") | |
assertEquals(makeValidFilename("Test.avi", custom_blacklist="e"), "T_st.avi") | |
assertEquals(makeValidFilename("a" * 300), "a" * 255) | |
assertEquals(makeValidFilename("a" * 255 + ".avi"), "a" * 251 + ".avi") | |
assertEquals(makeValidFilename("a" * 251 + "b" * 10 + ".avi"), "a" * 251 + ".avi") | |
assertEquals(makeValidFilename("test." + "a" * 255), "test." + "a" * 250) | |
if __name__ == '__main__': | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment