Last active
April 10, 2018 21:12
-
-
Save kevinpostal/bb2968f3100669091ae397126af78189 to your computer and use it in GitHub Desktop.
Attempt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import itertools | |
import sys | |
from collections import Counter | |
from os import listdir | |
from os.path import isfile, join | |
filelist = [] | |
fileset = set() | |
parse_number = lambda num: "%%0%sd" % len(num) | |
class LSS: | |
def __init__(self, location="."): | |
self.location = location | |
self.__process_filenames() | |
def __normalize_filename(self, filename): | |
return { "name": filename, "group": re.sub(r'\d+', '(\d+)', filename)} | |
def __process_filenames(self): | |
file_list = sorted( | |
[ | |
self.__normalize_filename(f) for f in listdir(self.location) | |
if isfile(join(self.location, f)) | |
], | |
key=lambda x: x['name']) | |
groups = itertools.groupby(file_list, lambda x: x['group']) | |
self.__process_groups(groups) | |
def __process_groups(self, groups): | |
for name, files in groups: | |
name = files.next() | |
fileNumbers = [ | |
re.match(file['group'], file['name']).groups() for file in list(files) | |
] | |
if fileNumbers: | |
filelist.append({ | |
"file": name.get("name"), | |
"group": name.get("group"), | |
"tuple": fileNumbers, | |
"digits": re.findall('\d+', name.get("name")), | |
"tuple_count": Counter(item[0] for item in fileNumbers), | |
"count": len(fileNumbers) | |
}) | |
else: | |
filelist.append({"file": name.get("name"), "count": 1}) | |
self.__process_occurrences(filelist) | |
def __process_occurrences(self, filelist): | |
for entry in filelist: | |
if entry.get("group"): | |
filename = entry.get("group") | |
fileconcat = entry.get("tuple") | |
for numbervalue, occurrences in entry.get("tuple_count").items(): | |
if len(numbervalue) is 1: | |
format_number = lambda num: "%%0%sd" % len(num) | |
filename = entry.get("group") | |
digits = entry.get("digits") | |
for count, number_padding in enumerate(digits): | |
formated_file_name = re.sub(r'\(\\d.\)', numbervalue, filename, count=1) | |
final_name = re.sub(r'\(\\d.\)', number_padding, formated_file_name, count=1) | |
fileset.add((int(occurrences), final_name)) | |
else: | |
filename = entry.get("file") | |
fileset.add((entry.get("count"), filename)) | |
for count, filename in sorted(fileset, reverse=False, key=lambda tup: tup[1]): | |
print "%d %s" % (count, filename) | |
if __name__ == '__main__': | |
if len(sys.argv) > 1: | |
LSS(sys.argv[1]) | |
else: | |
LSS() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment