Created
October 21, 2016 06:33
-
-
Save chaonan99/bdf0c0bfcfc517727a5b3f52b8cd3568 to your computer and use it in GitHub Desktop.
Organize a list of string in a structured way (dictionary, list structure)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Organize a list of string in a structured way (very slow) | |
:Author: chaonan99 | |
:Date: 2016/10/21 | |
""" | |
import re | |
import numpy as np | |
class StructureDir(object): | |
""" StructureDir | |
:type pattern: `_sre.SRE_Pattern` (returned by re.compile) | |
:param pattern: a regular expression object to match each string in the list | |
:type format_list: `list` of `string` | |
:value format_list: "list" or "dict" | |
:param format_list: indict a group should be organized as dictionary or list | |
""" | |
def __init__(self, pattern, format_list): | |
super(StructureDir, self).__init__() | |
self.pattern = pattern | |
self.format_list = format_list | |
assert pattern.groups == len(format_list), "length of pattern groups and format list do not match!" | |
def structured(self, list_of_dir): | |
def f(x): | |
return self.pattern.findall(x)[0] | |
matches = tuple(map(f, list_of_dir)) | |
return self.__merge_one_layer(dict(zip(matches, list_of_dir))) | |
def __merge_one_layer(self, current_merge): | |
matches = list(current_merge.keys()) | |
files = list(current_merge.values()) | |
n = len(matches[0]) - 1 | |
if n == -1: | |
return current_merge | |
if n == 0: | |
if self.format_list[n] == 'list': | |
return files | |
elif self.format_list[n] == 'dict': | |
return dict(zip(matches, files)) | |
else: | |
raise ValueError("Unexpected format indicator: {}".format, self.format_list[n]) | |
else: | |
if self.format_list[n] == 'list': | |
next_merge = {x:[files[ind] for ind, y in enumerate(np.array(matches)) if np.all(y[0:n]==np.array(x))] \ | |
for x in {tuple(row) for row in np.array(matches)[:, 0:n]}} | |
elif self.format_list[n] == 'dict': | |
next_merge = {x:{y[n]:files[ind] for ind, y in enumerate(np.array(matches)) if np.all(y[0:n]==np.array(x))} \ | |
for x in {tuple(row) for row in np.array(matches)[:, 0:n]}} | |
else: | |
raise ValueError("Unexpected format indicator: {}".format, self.format_list[n]) | |
return self.__merge_one_layer(next_merge) | |
if __name__ == '__main__': | |
"""example""" | |
pattern = re.compile(r'/data/datasets/MARS/bbox_test/\d+/(\d+)C(\d+)T(\d+)F(\d+)') | |
files = [ | |
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0001", | |
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0002", | |
"/data/datasets/MARS/bbox_test/0001/0001C2T001F0003", | |
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0001", | |
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0002", | |
"/data/datasets/MARS/bbox_test/0001/0001C2T002F0003", | |
"/data/datasets/MARS/bbox_test/0001/0001C4T003F0001", | |
"/data/datasets/MARS/bbox_test/0001/0001C4T003F0002", | |
"/data/datasets/MARS/bbox_test/0001/0002C1T001F0001", | |
"/data/datasets/MARS/bbox_test/0001/0002C1T001F0002", | |
"/data/datasets/MARS/bbox_test/0001/0002C1T002F0001", | |
"/data/datasets/MARS/bbox_test/0001/0002C2T003F0001", | |
"/data/datasets/MARS/bbox_test/0001/0002C2T003F0002", | |
] | |
stc = StructureDir(pattern, ["dict", "dict", "list", "list"]) | |
res = stc.structured(files) | |
from IPython import embed; embed() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment