import unicodedata import string valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) char_limit = 255 def clean_filename(filename, whitelist=valid_filename_chars, replace=' '): # replace spaces for r in replace: filename = filename.replace(r,'_') # keep only valid ascii chars cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode() # keep only whitelisted chars cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist) if len(cleaned_filename)>char_limit: print("Warning, filename truncated because it was over {}. Filenames may no longer be unique".format(char_limit)) return cleaned_filename[:char_limit] # test s='fake_folder/\[]}{}|~`"\':;,/? abcABC 0123 !@#$%^&*()_+ clᐖﯫⅺຶ 讀ϯՋ㉘ ⅮRㇻᎠ⍩ 𝁱C ℿ؛ἂeuឃC ᅕ ᑉﺜͧ bⓝ s⡽Հᛕ\ue063 牢𐐥er ᐛŴ n წş .ھڱ df df dsfsdfgsg!zip' clean_filename(s) # 'fake_folder_abcABC_0123_____clxi_28_DR_C_euC___bn_s_er_W_n_s_.zip'