Last active
July 29, 2016 16:17
-
-
Save categulario/9422aa048cbdbca942ef82bfd515af91 to your computer and use it in GitHub Desktop.
given an array of strings make a tree of common prefixes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import unittest | |
| from itertools import takewhile, dropwhile, zip_longest | |
| import json | |
| from pprint import pprint | |
| def largest_prefix(string1, string2): | |
| return '/'.join( | |
| map( | |
| lambda x:x[0], | |
| takewhile( | |
| lambda x:x[0]==x[1], | |
| zip(string1.split('/'), string2.split('/')) | |
| ) | |
| ) | |
| ) | |
| def discard_prefix(prefix, string): | |
| return '/'.join( | |
| map( | |
| lambda x: x[1], | |
| dropwhile( | |
| lambda x: x[0] == x[1], | |
| zip_longest(prefix.split('/'), string.split('/')) | |
| ) | |
| ) | |
| ) | |
| class Group: | |
| """A group""" | |
| def __init__(self, string): | |
| self.name = string | |
| self.prefix = string.split('/')[0] | |
| self.children = [] | |
| def has_common_prefix(self, string): | |
| return self.prefix == string.split('/')[0] | |
| def process(self, string): | |
| lcp = largest_prefix(self.name, string) | |
| if lcp == self.name: | |
| for group in self.children: | |
| subname = discard_prefix(lcp, string) | |
| if group.has_common_prefix(subname): | |
| group.process(subname) | |
| break | |
| else: | |
| # just append this group to the children | |
| self.children.append(Group(discard_prefix(self.name, string))) | |
| else: | |
| if not self.children: | |
| # fork this group into subgroups | |
| self.children.append(Group(discard_prefix(lcp, self.name))) | |
| self.children.append(Group(discard_prefix(lcp, string))) | |
| self.name = lcp | |
| else: | |
| # move children to a subgroup and fork | |
| g1 = Group(discard_prefix(lcp, self.name)) | |
| g1.children = self.children | |
| self.children = [] | |
| self.children.append(g1) | |
| self.children.append(Group(discard_prefix(lcp, string))) | |
| self.name = lcp | |
| def __repr__(self): | |
| return repr({ | |
| "name": self.name, | |
| "children": self.children | |
| }) | |
| def make_tree(string_array): | |
| groups = [] | |
| for string in string_array: | |
| for group in groups: | |
| if group.has_common_prefix(string): | |
| group.process(string) | |
| break | |
| else: | |
| groups.append(Group(string)) | |
| return groups | |
| class TestMakeTree(unittest.TestCase): | |
| def test_largest_prefix(self): | |
| self.assertEqual('ags/ags', largest_prefix('ags/ags/foo', 'ags/ags/var')) | |
| self.assertEqual('ags', largest_prefix('ags/log/foo', 'ags/ags/var')) | |
| self.assertEqual('', largest_prefix('var/log/foo', 'ags/ags/var')) | |
| self.assertEqual('ags/ags', largest_prefix('ags/ags/foo', 'ags/ags/var/vaz')) | |
| self.assertEqual('ags/ags/var', largest_prefix('ags/ags/var', 'ags/ags/var/vaz')) | |
| def test_discard_prefix(self): | |
| self.assertEqual('var', discard_prefix('foo/var', 'foo/var/var')) | |
| self.assertEqual('var/log', discard_prefix('foo', 'foo/var/log')) | |
| self.assertEqual('var/log/baz', discard_prefix('', 'var/log/baz')) | |
| def test_one(self): | |
| res = make_tree(['ags/ags/uni']) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('ags/ags/uni', res[0].name) | |
| def test_two(self): | |
| res = make_tree(['ags/ags/uni', 'ags/ags/log']) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('ags/ags', res[0].name) | |
| self.assertEqual('uni', res[0].children[0].name) | |
| self.assertEqual('log', res[0].children[1].name) | |
| def test_two2(self): | |
| res = make_tree(['ags/ags/uni', 'ags/var/log']) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('ags', res[0].name) | |
| self.assertEqual('ags/uni', res[0].children[0].name) | |
| self.assertEqual('var/log', res[0].children[1].name) | |
| def test_break_broken(self): | |
| res = make_tree(['ags/ags/foo', 'ags/ags/var', 'ags/log/baz']) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('ags', res[0].name) | |
| self.assertEqual(2, len(res[0].children)) | |
| self.assertEqual('ags', res[0].children[0].name) | |
| self.assertEqual('log/baz', res[0].children[1].name) | |
| self.assertEqual(2, len(res[0].children[0].children)) | |
| self.assertEqual('foo', res[0].children[0].children[0].name) | |
| self.assertEqual('var', res[0].children[0].children[1].name) | |
| def test_deep_break(self): | |
| res = make_tree([ | |
| 'ags/log/baz', | |
| 'ags/var/foo', | |
| 'ags/log/foo', | |
| ]) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('ags', res[0].name) | |
| self.assertEqual(2, len(res[0].children)) | |
| self.assertEqual('log', res[0].children[0].name) | |
| self.assertEqual('var/foo', res[0].children[1].name) | |
| self.assertEqual(2, len(res[0].children[0].children)) | |
| self.assertEqual('baz', res[0].children[0].children[0].name) | |
| self.assertEqual('foo', res[0].children[0].children[1].name) | |
| def test_cdmx_case(self): | |
| res = make_tree([ | |
| 'CDMX/JDP/JDP', | |
| 'CDMX/JRC/JRC', | |
| 'CDMX/MEX/ATI', | |
| 'CDMX/MEX/NAU', | |
| ]) | |
| self.assertEqual(1, len(res)) | |
| self.assertEqual('CDMX', res[0].name) | |
| self.assertEqual(3, len(res[0].children)) | |
| self.assertEqual('JDP/JDP', res[0].children[0].name) | |
| self.assertEqual('JRC/JRC', res[0].children[1].name) | |
| self.assertEqual('MEX', res[0].children[2].name) | |
| self.assertEqual(2, len(res[0].children[2].children)) | |
| self.assertEqual('ATI', res[0].children[2].children[0].name) | |
| self.assertEqual('NAU', res[0].children[2].children[1].name) | |
| if __name__ == '__main__': | |
| res = make_tree(['ags/ags/foo', 'ags/ags/var', 'ags/log/baz']) | |
| pprint(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment