Created
October 4, 2015 19:13
-
-
Save icedraco/4eb4f311d01452ee733f to your computer and use it in GitHub Desktop.
A script used to recursively check a path for suspicious (less compatible with portable devices) filenames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| import os | |
| import sys | |
| # Paths longer than this amount of bytes will generate a warning | |
| MAX_PATH_LEN = 256 | |
| # Good characters | |
| GOOD_CHARS = " 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?'._-()[]" | |
| # Extensions whitelist/blacklist | |
| EXT_WHITELIST = [ 'mp3', 'ogg', 'flac', 'wma', 'aac' ] | |
| EXT_BLACKLIST = [ 'db' ] | |
| # Generate a warning if non-whitelisted extensions present? | |
| WARN_UNKNOWN_EXT = True | |
| # Print notes? | |
| PRINT_NOTES = True | |
| def warn(path, notes=[]): | |
| """Prints a warning for a given path with all the extra notes, if enabled""" | |
| print "[W]", path | |
| if PRINT_NOTES: | |
| for note in notes: | |
| print " *", note | |
| def err(path, notes=[]): | |
| """Prints an error for a given path with all the extra notes, if enabled""" | |
| print "[E]", path | |
| if PRINT_NOTES: | |
| for note in notes: | |
| print " *", note | |
| def hilight_chars(s, idx=[]): | |
| output = list(s) | |
| for i in idx: | |
| output[i] = "<%s>" % output[i] | |
| return "".join(output) | |
| def count_dots(name): | |
| return len(filter(lambda ch: ch == '.', list(name))) | |
| def get_bad_chars(name): | |
| i = 0 | |
| bad_char_idx = [] | |
| for ch in name: | |
| if ch not in GOOD_CHARS: | |
| bad_char_idx += [i] | |
| i += 1 | |
| return bad_char_idx | |
| def get_non_ascii(name): | |
| i = 0 | |
| bad_char_idx = [] | |
| for ch in name: | |
| o = ord(ch) | |
| if o < 0x20 or o > 0x7E: | |
| bad_char_idx += [i] | |
| i += 1 | |
| return bad_char_idx | |
| def check_dirs(path, dirs): | |
| for d in dirs: | |
| warnings = [] | |
| errors = [] | |
| full_path = os.path.join(path, d) | |
| if d == "": | |
| error += ["WEIRD: EMPTY DIRECTORY NAME"] | |
| continue | |
| # Check how many dots does this have | |
| if count_dots(d) > 0: | |
| warnings += ["One or more period in directory name"] | |
| # Does it start with a period? | |
| if d[0] == '.': | |
| warnings += ["Directory starts with a period (.something)"] | |
| # Does it have non-good characters? | |
| non_good_idx = get_bad_chars(d) | |
| if non_good_idx != []: | |
| warnings += ["Potentially bad characters in name"] | |
| warnings += [" -> "+ hilight_chars(d, non_good_idx)] | |
| # Does it have non-ASCII characters? | |
| non_ascii_idx = get_non_ascii(d) | |
| if non_ascii_idx != []: | |
| errors += ["Non-ASCII characters in name!"] | |
| errors += [" -> "+ hilight_chars(d, non_ascii_idx)] | |
| # Check path length | |
| path_len = len(full_path) | |
| if path_len > MAX_PATH_LEN: | |
| warnings += ["Long path (%d > %d)" % (path_len, MAX_PATH_LEN)] | |
| # Print results | |
| if errors != []: | |
| err(full_path, errors) | |
| if warnings != []: | |
| warn(full_path, warnings) | |
| def check_files(path, files): | |
| for f in files: | |
| warnings = [] | |
| errors = [] | |
| full_path = os.path.join(path, f) | |
| if f == "": | |
| error += ["WEIRD: EMPTY FILE NAME"] | |
| continue | |
| # Does it have an extension? | |
| num_dots = count_dots(f) | |
| if num_dots == 0: | |
| errors += ["No file extension!"] | |
| continue | |
| # Check if it has several dots | |
| if num_dots > 1: | |
| warnings += ["More than one period in file name"] | |
| # Does it start with a period? | |
| if f[0] == '.': | |
| warnings += ["File starts with a period (.something)"] | |
| ext = f.split('.')[-1].lower() | |
| # Check extension length | |
| if len(ext) != 3: | |
| warnings += ["File extension is not 3 characters - suspicious"] | |
| # Check whitelist/blacklist | |
| if ext not in EXT_WHITELIST: | |
| if ext in EXT_BLACKLIST: | |
| errors += ["File extension is blacklisted!"] | |
| else: | |
| warnings += ["File extension (%s) not in whitelist!" % ext] | |
| # Does it have non-good characters? | |
| non_good_idx = get_bad_chars(f) | |
| if non_good_idx != []: | |
| warnings += ["Potentially bad characters in name"] | |
| warnings += [" -> "+ hilight_chars(f, non_good_idx)] | |
| # Does it have non-ASCII characters? | |
| non_ascii_idx = get_non_ascii(f) | |
| if non_ascii_idx != []: | |
| errors += ["Non-ASCII characters in name!"] | |
| errors += [" -> "+ hilight_chars(f, non_ascii_idx)] | |
| # Check path length | |
| path_len = len(full_path) | |
| if path_len > MAX_PATH_LEN: | |
| warnings += ["Long path (%d > %d)" % (path_len, MAX_PATH_LEN)] | |
| # Print results | |
| if errors != []: | |
| err(full_path, errors) | |
| if warnings != []: | |
| warn(full_path, warnings) | |
| def check(path, dirs, files): | |
| # Is it an empty dir? | |
| if dirs == [] and files == []: | |
| warn(path, ["Empty directory"]) | |
| check_dirs(path, dirs) | |
| check_files(path, files) | |
| def main(argv): | |
| if len(argv) > 1: | |
| path = argv[1] | |
| else: | |
| path = '.' | |
| # Ensure it exists | |
| if not os.path.exists(path): | |
| sys.stderr.write("Path '%s' does not exist\n") | |
| return 1 | |
| # Walk through... | |
| for (pathname, dirs, files) in os.walk(path): | |
| check(pathname, dirs, files) | |
| # Done | |
| print "DONE" | |
| return 0 | |
| ###--# Initialization #--###################################################### | |
| if __name__ == '__main__': | |
| raise SystemExit( main(sys.argv) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment